<!DOCTYPE html>
<html lang="zh-CN">

<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
  <meta name="theme-color" content="#222">
  <meta name="generator" content="Hexo 4.2.1">
  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png">
  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png">
  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png">
  <link rel="mask-icon" href="/images/safari-pinned-tab.svg" color="#222">
  <link rel="stylesheet" href="/css/main.css">
  <link rel="stylesheet" href="/lib/font-awesome/css/all.min.css">
  <link rel="stylesheet" href="/lib/pace/pace-theme-minimal.min.css">
  <script src="/lib/pace/pace.min.js"></script>
  <script id="hexo-configurations">
    var NexT = window.NexT ||
    {};
    var CONFIG = {
      "hostname": "cuiqingcai.com",
      "root": "/",
      "scheme": "Pisces",
      "version": "7.8.0",
      "exturl": false,
      "sidebar":
      {
        "position": "right",
        "width": 360,
        "display": "post",
        "padding": 18,
        "offset": 12,
        "onmobile": false,
        "widgets": [
          {
            "type": "image",
            "name": "阿布云",
            "enable": false,
            "url": "https://www.abuyun.com/http-proxy/introduce.html",
            "src": "https://qiniu.cuiqingcai.com/88au8.jpg",
            "width": "100%"
      },
          {
            "type": "image",
            "name": "天验",
            "enable": true,
            "url": "https://tutorial.lengyue.video/?coupon=12ef4b1a-a3db-11ea-bb37-0242ac130002_cqx_850",
            "src": "https://qiniu.cuiqingcai.com/bco2a.png",
            "width": "100%"
      },
          {
            "type": "image",
            "name": "华为云",
            "enable": false,
            "url": "https://activity.huaweicloud.com/2020_618_promotion/index.html?bpName=5f9f98a29e2c40b780c1793086f29fe2&bindType=1&salesID=wangyubei",
            "src": "https://qiniu.cuiqingcai.com/y42ik.jpg",
            "width": "100%"
      },
          {
            "type": "image",
            "name": "张小鸡",
            "enable": false,
            "url": "http://www.zxiaoji.com/",
            "src": "https://qiniu.cuiqingcai.com/fm72f.png",
            "width": "100%"
      },
          {
            "type": "image",
            "name": "Luminati",
            "src": "https://qiniu.cuiqingcai.com/ikkq9.jpg",
            "url": "https://luminati-china.io/?affiliate=ref_5fbbaaa9647883f5c6f77095",
            "width": "100%",
            "enable": false
      },
          {
            "type": "image",
            "name": "IPIDEA",
            "url": "http://www.ipidea.net/?utm-source=cqc&utm-keyword=?cqc",
            "src": "https://qiniu.cuiqingcai.com/0ywun.png",
            "width": "100%",
            "enable": true
      },
          {
            "type": "tags",
            "name": "标签云",
            "enable": true
      },
          {
            "type": "categories",
            "name": "分类",
            "enable": true
      },
          {
            "type": "friends",
            "name": "友情链接",
            "enable": true
      },
          {
            "type": "hot",
            "name": "猜你喜欢",
            "enable": true
      }]
      },
      "copycode":
      {
        "enable": true,
        "show_result": true,
        "style": "mac"
      },
      "back2top":
      {
        "enable": true,
        "sidebar": false,
        "scrollpercent": true
      },
      "bookmark":
      {
        "enable": false,
        "color": "#222",
        "save": "auto"
      },
      "fancybox": false,
      "mediumzoom": false,
      "lazyload": false,
      "pangu": true,
      "comments":
      {
        "style": "tabs",
        "active": "gitalk",
        "storage": true,
        "lazyload": false,
        "nav": null,
        "activeClass": "gitalk"
      },
      "algolia":
      {
        "hits":
        {
          "per_page": 10
        },
        "labels":
        {
          "input_placeholder": "Search for Posts",
          "hits_empty": "We didn't find any results for the search: ${query}",
          "hits_stats": "${hits} results found in ${time} ms"
        }
      },
      "localsearch":
      {
        "enable": true,
        "trigger": "auto",
        "top_n_per_article": 10,
        "unescape": false,
        "preload": false
      },
      "motion":
      {
        "enable": false,
        "async": false,
        "transition":
        {
          "post_block": "bounceDownIn",
          "post_header": "slideDownIn",
          "post_body": "slideDownIn",
          "coll_header": "slideLeftIn",
          "sidebar": "slideUpIn"
        }
      },
      "path": "search.xml"
    };

  </script>
  <meta name="description" content="原理 中文分词，即 Chinese Word Segmentation，即将一个汉字序列进行切分，得到一个个单独的词。表面上看，分词其实就是那么回事，但分词效果好不好对信息检索、实验结果还是有很大影响的，同时分词的背后其实是涉及各种各样的算法的。 中文分词与英文分词有很大的不同，对英文而言，一个单词就是一个词，而汉语是以字为基本的书写单位，词语之间没有明显的区分标记，需要人为切分。根据其特点，可以">
  <meta property="og:type" content="article">
  <meta property="og:title" content="中文分词原理及工具">
  <meta property="og:url" content="https://cuiqingcai.com/5844.html">
  <meta property="og:site_name" content="静觅">
  <meta property="og:description" content="原理 中文分词，即 Chinese Word Segmentation，即将一个汉字序列进行切分，得到一个个单独的词。表面上看，分词其实就是那么回事，但分词效果好不好对信息检索、实验结果还是有很大影响的，同时分词的背后其实是涉及各种各样的算法的。 中文分词与英文分词有很大的不同，对英文而言，一个单词就是一个词，而汉语是以字为基本的书写单位，词语之间没有明显的区分标记，需要人为切分。根据其特点，可以">
  <meta property="og:locale" content="zh_CN">
  <meta property="article:published_time" content="2018-03-15T13:32:47.000Z">
  <meta property="article:modified_time" content="2021-12-18T13:11:11.561Z">
  <meta property="article:author" content="崔庆才">
  <meta property="article:tag" content="崔庆才">
  <meta property="article:tag" content="静觅">
  <meta property="article:tag" content="PHP">
  <meta property="article:tag" content="Java">
  <meta property="article:tag" content="Python">
  <meta property="article:tag" content="Spider">
  <meta property="article:tag" content="爬虫">
  <meta property="article:tag" content="Web">
  <meta property="article:tag" content="Kubernetes">
  <meta property="article:tag" content="深度学习">
  <meta property="article:tag" content="机器学习">
  <meta property="article:tag" content="数据分析">
  <meta property="article:tag" content="网络">
  <meta property="article:tag" content="IT">
  <meta property="article:tag" content="技术">
  <meta property="article:tag" content="博客">
  <meta name="twitter:card" content="summary">
  <link rel="canonical" href="https://cuiqingcai.com/5844.html">
  <script id="page-configurations">
    // https://hexo.io/docs/variables.html
    CONFIG.page = {
      sidebar: "",
      isHome: false,
      isPost: true,
      lang: 'zh-CN'
    };

  </script>
  <title>中文分词原理及工具 | 静觅</title>
  <meta name="google-site-verification" content="p_bIcnvirkFzG2dYKuNDivKD8-STet5W7D-01woA2fc" />
  <noscript>
    <style>
      .use-motion .brand,
      .use-motion .menu-item,
      .sidebar-inner,
      .use-motion .post-block,
      .use-motion .pagination,
      .use-motion .comments,
      .use-motion .post-header,
      .use-motion .post-body,
      .use-motion .collection-header
      {
        opacity: initial;
      }

      .use-motion .site-title,
      .use-motion .site-subtitle
      {
        opacity: initial;
        top: initial;
      }

      .use-motion .logo-line-before i
      {
        left: initial;
      }

      .use-motion .logo-line-after i
      {
        right: initial;
      }

    </style>
  </noscript>
  <link rel="alternate" href="/atom.xml" title="静觅" type="application/atom+xml">
</head>

<body itemscope itemtype="http://schema.org/WebPage">
  <div class="container">
    <div class="headband"></div>
    <header class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner">
        <div class="site-brand-container">
          <div class="site-nav-toggle">
            <div class="toggle" aria-label="切换导航栏">
              <span class="toggle-line toggle-line-first"></span>
              <span class="toggle-line toggle-line-middle"></span>
              <span class="toggle-line toggle-line-last"></span>
            </div>
          </div>
          <div class="site-meta">
            <a href="/" class="brand" rel="start">
              <span class="logo-line-before"><i></i></span>
              <h1 class="site-title">静觅 <span class="site-subtitle"> 崔庆才的个人站点 </span>
              </h1>
              <span class="logo-line-after"><i></i></span>
            </a>
          </div>
          <div class="site-nav-right">
            <div class="toggle popup-trigger">
              <i class="fa fa-search fa-fw fa-lg"></i>
            </div>
          </div>
        </div>
        <nav class="site-nav">
          <ul id="menu" class="main-menu menu">
            <li class="menu-item menu-item-home">
              <a href="/" rel="section">首页</a>
            </li>
            <li class="menu-item menu-item-archives">
              <a href="/archives/" rel="section">文章列表</a>
            </li>
            <li class="menu-item menu-item-tags">
              <a href="/tags/" rel="section">文章标签</a>
            </li>
            <li class="menu-item menu-item-categories">
              <a href="/categories/" rel="section">文章分类</a>
            </li>
            <li class="menu-item menu-item-about">
              <a href="/about/" rel="section">关于博主</a>
            </li>
            <li class="menu-item menu-item-message">
              <a href="/message/" rel="section">给我留言</a>
            </li>
            <li class="menu-item menu-item-search">
              <a role="button" class="popup-trigger">搜索 </a>
            </li>
          </ul>
        </nav>
        <div class="search-pop-overlay">
          <div class="popup search-popup">
            <div class="search-header">
              <span class="search-icon">
                <i class="fa fa-search"></i>
              </span>
              <div class="search-input-container">
                <input autocomplete="off" autocapitalize="off" placeholder="搜索..." spellcheck="false" type="search" class="search-input">
              </div>
              <span class="popup-btn-close">
                <i class="fa fa-times-circle"></i>
              </span>
            </div>
            <div id="search-result">
              <div id="no-result">
                <i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>
              </div>
            </div>
          </div>
        </div>
      </div>
    </header>
    <div class="back-to-top">
      <i class="fa fa-arrow-up"></i>
      <span>0%</span>
    </div>
    <div class="reading-progress-bar"></div>
    <main class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div class="content post posts-expand">
            <article itemscope itemtype="http://schema.org/Article" class="post-block single" lang="zh-CN">
              <link itemprop="mainEntityOfPage" href="https://cuiqingcai.com/5844.html">
              <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
                <meta itemprop="image" content="/images/avatar.png">
                <meta itemprop="name" content="崔庆才">
                <meta itemprop="description" content="崔庆才的个人站点，记录生活的瞬间，分享学习的心得。">
              </span>
              <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
                <meta itemprop="name" content="静觅">
              </span>
              <header class="post-header">
                <h1 class="post-title" itemprop="name headline"> 中文分词原理及工具 </h1>
                <div class="post-meta">
                  <span class="post-meta-item">
                    <span class="post-meta-item-icon">
                      <i class="far fa-user"></i>
                    </span>
                    <span class="post-meta-item-text">作者</span>
                    <span><a href="/authors/崔庆才" class="author" itemprop="url" rel="index">崔庆才</a></span>
                  </span>
                  <span class="post-meta-item">
                    <span class="post-meta-item-icon">
                      <i class="far fa-calendar"></i>
                    </span>
                    <span class="post-meta-item-text">发表于</span>
                    <time title="创建时间：2018-03-15 21:32:47" itemprop="dateCreated datePublished" datetime="2018-03-15T21:32:47+08:00">2018-03-15</time>
                  </span>
                  <span class="post-meta-item">
                    <span class="post-meta-item-icon">
                      <i class="far fa-folder"></i>
                    </span>
                    <span class="post-meta-item-text">分类于</span>
                    <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                      <a href="/categories/Python/" itemprop="url" rel="index"><span itemprop="name">Python</span></a>
                    </span>
                  </span>
                  <span id="/5844.html" class="post-meta-item leancloud_visitors" data-flag-title="中文分词原理及工具" title="阅读次数">
                    <span class="post-meta-item-icon">
                      <i class="fa fa-eye"></i>
                    </span>
                    <span class="post-meta-item-text">阅读次数：</span>
                    <span class="leancloud-visitors-count"></span>
                  </span>
                  <span class="post-meta-item" title="本文字数">
                    <span class="post-meta-item-icon">
                      <i class="far fa-file-word"></i>
                    </span>
                    <span class="post-meta-item-text">本文字数：</span>
                    <span>11k</span>
                  </span>
                  <span class="post-meta-item" title="阅读时长">
                    <span class="post-meta-item-icon">
                      <i class="far fa-clock"></i>
                    </span>
                    <span class="post-meta-item-text">阅读时长 &asymp;</span>
                    <span>10 分钟</span>
                  </span>
                </div>
              </header>
              <div class="post-body" itemprop="articleBody">
                <div class="advertisements">
                  <div class="item">
                    <a href="http://i0k.cn/4UUsd" target="_blank">
                      <img src="https://qiniu.cuiqingcai.com/dsdhf.jpg">
                    </a>
                  </div>
                </div>
                <h2 id="原理"><a href="#原理" class="headerlink" title="原理"></a>原理</h2>
                <p>中文分词，即 Chinese Word Segmentation，即将一个汉字序列进行切分，得到一个个单独的词。表面上看，分词其实就是那么回事，但分词效果好不好对信息检索、实验结果还是有很大影响的，同时分词的背后其实是涉及各种各样的算法的。 中文分词与英文分词有很大的不同，对英文而言，一个单词就是一个词，而汉语是以字为基本的书写单位，词语之间没有明显的区分标记，需要人为切分。根据其特点，可以把分词算法分为四大类：</p>
                <ul>
                  <li>基于规则的分词方法</li>
                  <li>基于统计的分词方法</li>
                  <li>基于语义的分词方法</li>
                  <li>基于理解的分词方法</li>
                </ul>
                <p>下面我们对这几种方法分别进行总结。</p>
                <h3 id="基于规则的分词方法"><a href="#基于规则的分词方法" class="headerlink" title="基于规则的分词方法"></a>基于规则的分词方法</h3>
                <p>这种方法又叫作机械分词方法、基于字典的分词方法，它是按照一定的策略将待分析的汉字串与一个“充分大的”机器词典中的词条进行匹配。若在词典中找到某个字符串，则匹配成功。该方法有三个要素，即分词词典、文本扫描顺序和匹配原则。文本的扫描顺序有正向扫描、逆向扫描和双向扫描。匹配原则主要有最大匹配、最小匹配、逐词匹配和最佳匹配。</p>
                <ul>
                  <li>最大匹配法（MM）。基本思想是：假设自动分词词典中的最长词条所含汉字的个数为 i，则取被处理材料当前字符串序列中的前 i 个字符作为匹配字段，查找分词词典，若词典中有这样一个 i 字词，则匹配成功，匹配字段作为一个词被切分出来；若词典中找不到这样的一个 i 字词，则匹配失败，匹配字段去掉最后一个汉字，剩下的字符作为新的匹配字段，再进行匹配，如此进行下去，直到匹配成功为止。统计结果表明，该方法的错误率 为 1/169。</li>
                  <li>逆向最大匹配法（RMM）。该方法的分词过程与 MM 法相同，不同的是从句子（或文章）末尾开始处理，每次匹配不成功时去掉的是前面的一个汉字。统计结果表明，该方法的错误率为 1/245。</li>
                  <li>逐词遍历法。把词典中的词按照由长到短递减的顺序逐字搜索整个待处理的材料，一直到把全部的词切分出来为止。不论分词词典多大，被处理的材料多么小，都得把这个分词词典匹配一遍。</li>
                  <li>设立切分标志法。切分标志有自然和非自然之分。自然切分标志是指文章中出现的非文字符号，如标点符号等；非自然标志是利用词缀和不构成词的词（包 括单音词、复音节词以及象声词等）。设立切分标志法首先收集众多的切分标志，分词时先找出切分标志，把句子切分为一些较短的字段，再用 MM、RMM 或其它的方法进行细加工。这种方法并非真正意义上的分词方法，只是自动分词的一种前处理方式而已，它要额外消耗时间扫描切分标志，增加存储空间存放那些非 自然切分标志。</li>
                  <li>最佳匹配法（OM）。此法分为正向的最佳匹配法和逆向的最佳匹配法，其出发点是：在词典中按词频的大小顺序排列词条，以求缩短对分词词典的检索时 间，达到最佳效果，从而降低分词的时间复杂度，加快分词速度。实质上，这种方法也不是一种纯粹意义上的分词方法，它只是一种对分词词典的组织方式。OM 法的分词词典每条词的前面必须有指明长度的数据项，所以其空间复杂度有所增加，对提高分词精度没有影响，分词处理的时间复杂度有所降低。</li>
                </ul>
                <p>此种方法优点是简单，易于实现。但缺点有很多：匹配速度慢；存在交集型和组合型歧义切分问题；词本身没有一个标准的定义，没有统一标准的词集；不同词典产生的歧义也不同；缺乏自学习的智能性。</p>
                <h3 id="基于统计的分词方法"><a href="#基于统计的分词方法" class="headerlink" title="基于统计的分词方法"></a>基于统计的分词方法</h3>
                <p>该方法的主要思想：词是稳定的组合，因此在上下文中，相邻的字同时出现的次数越多，就越有可能构成一个词。因此字与字相邻出现的概率或频率能较好地反映成词的可信度。可以对训练文本中相邻出现的各个字的组合的频度进行统计，计算它们之间的互现信息。互现信息体现了汉字之间结合关系的紧密程度。当紧密程 度高于某一个阈值时，便可以认为此字组可能构成了一个词。该方法又称为无字典分词。 该方法所应用的主要的统计模型有：N 元文法模型（N-gram）、隐马尔可夫模型（Hiden Markov Model，HMM）、最大熵模型（ME）、条件随机场模型（Conditional Random Fields，CRF）等。 在实际应用中此类分词算法一般是将其与基于词典的分词方法结合起来，既发挥匹配分词切分速度快、效率高的特点，又利用了无词典分词结合上下文识别生词、自动消除歧义的优点。</p>
                <h3 id="基于语义的分词方法"><a href="#基于语义的分词方法" class="headerlink" title="基于语义的分词方法"></a>基于语义的分词方法</h3>
                <p>语义分词法引入了语义分析，对自然语言自身的语言信息进行更多的处理，如扩充转移网络法、知识分词语义分析法、邻接约束法、综合匹配法、后缀分词法、特征词库法、矩阵约束法、语法分析法等。</p>
                <ul>
                  <li>扩充转移网络法。该方法以有限状态机概念为基础。有限状态机只能识别正则语言，对有限状态机作的第一次扩充使其具有递归能力，形成递归转移网络 （RTN）。在RTN 中，弧线上的标志不仅可以是终极符（语言中的单词）或非终极符（词类），还可以调用另外的子网络名字分非终极符（如字或字串的成词条件）。这样，计算机在 运行某个子网络时，就可以调用另外的子网络，还可以递归调用。词法扩充转移网络的使用， 使分词处理和语言理解的句法处理阶段交互成为可能，并且有效地解决了汉语分词的歧义。</li>
                  <li>矩阵约束法。其基本思想是：先建立一个语法约束矩阵和一个语义约束矩阵， 其中元素分别表明具有某词性的词和具有另一词性的词相邻是否符合语法规则， 属于某语义类的词和属于另一词义类的词相邻是否符合逻辑，机器在切分时以之约束分词结果。</li>
                </ul>
                <h3 id="基于理解的分词方法"><a href="#基于理解的分词方法" class="headerlink" title="基于理解的分词方法"></a>基于理解的分词方法</h3>
                <p>基于理解的分词方法是通过让计算机模拟人对句子的理解，达到识别词的效果。其基本思想就是在分词的同时进行句法、语义分析，利用句法信息和语义信息来处理歧义现象。它通常包括三个部分：分词子系统、句法语义子系统、总控部分。在总控部分的协调下，分词子系统可以获得有关词、句子等的句法和语义信息来对分词歧义进行判断，即它模拟了人对句子的理解过程。这种分词方法需要使用大量的语言知识和信息。目前基于理解的分词方法主要有专家系统分词法和神经网络分词法等。</p>
                <ul>
                  <li>专家系统分词法。从专家系统角度把分词的知识（包括常识性分词知识与消除歧义切分的启发性知识即歧义切分规则）从实现分词过程的推理机中独立出来，使知识库的维护与推理机的实现互不干扰，从而使知识库易于维护和管理。它还具有发现交集歧义字段和多义组合歧义字段的能力和一定的自学习功能。</li>
                  <li>神经网络分词法。该方法是模拟人脑并行，分布处理和建立数值计算模型工作的。它将分词知识所分散隐式的方法存入神经网络内部，通过自学习和训练修改内部权值，以达到正确的分词结果，最后给出神经网络自动分词结果，如使用 LSTM、GRU 等神经网络模型等。</li>
                  <li>神经网络专家系统集成式分词法。该方法首先启动神经网络进行分词，当神经网络对新出现的词不能给出准确切分时，激活专家系统进行分析判断，依据知识库进行推理，得出初步分析，并启动学习机制对神经网络进行训练。该方法可以较充分发挥神经网络与专家系统二者优势，进一步提高分词效率。</li>
                </ul>
                <p>以上便是对分词算法的基本介绍，接下来我们再介绍几个比较实用的分词 Python 库及它们的使用方法。</p>
                <h2 id="分词工具"><a href="#分词工具" class="headerlink" title="分词工具"></a>分词工具</h2>
                <p>在这里介绍几个比较有代表性的支持分词的 Python 库，主要有：</p>
                <h3 id="1-jieba"><a href="#1-jieba" class="headerlink" title="1. jieba"></a>1. jieba</h3>
                <p>专用于分词的 Python 库，GitHub：<a href="https://github.com/fxsjy/jieba" target="_blank" rel="noopener">https://github.com/fxsjy/jieba</a>，分词效果较好。 支持三种分词模式：</p>
                <ul>
                  <li>精确模式，试图将句子最精确地切开，适合文本分析。</li>
                  <li>全模式，将句子中所有的可能成词的词语都扫描出来，速度非常快，但是不能解决歧义。</li>
                  <li>搜索引擎模式：在精确模式的基础上，对长词再次切分，提高召回率，适用于搜索引擎分词。</li>
                </ul>
                <p>另外 jieba 支持繁体分词，支持自定义词典。 其使用的算法是基于统计的分词方法，主要有如下几种：</p>
                <ul>
                  <li>基于前缀词典实现高效的词图扫描，生成句子中汉字所有可能成词情况所构成的有向无环图 (DAG)</li>
                  <li>采用了动态规划查找最大概率路径, 找出基于词频的最大切分组合</li>
                  <li>对于未登录词，采用了基于汉字成词能力的 HMM 模型，使用了 Viterbi 算法</li>
                </ul>
                <h4 id="精确模式分词"><a href="#精确模式分词" class="headerlink" title="精确模式分词"></a>精确模式分词</h4>
                <p>首先我们来看下精确模式分词，使用 lcut() 方法，类似 cut() 方法，其参数和 cut() 是一致的，只不过返回结果是列表而不是生成器，默认使用精确模式，代码如下：</p>
                <figure class="highlight livecodeserver">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line">import jieba</span><br><span class="line"><span class="keyword">string</span> = <span class="string">'这个把手该换了，我不喜欢日本和服，别把手放在我的肩膀上，工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作'</span></span><br><span class="line"><span class="built_in">result</span> = jieba.lcut(<span class="keyword">string</span>)</span><br><span class="line">print(<span class="built_in">len</span>(<span class="built_in">result</span>), <span class="string">'/'</span>.join(<span class="built_in">result</span>))</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>结果：</p>
                <figure class="highlight awk">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="number">38</span> 这个<span class="regexp">/把手/</span>该换<span class="regexp">/了/</span>，<span class="regexp">/我/</span>不<span class="regexp">/喜欢/</span>日本<span class="regexp">/和服/</span>，<span class="regexp">/别/</span>把手<span class="regexp">/放在/</span>我<span class="regexp">/的/</span>肩膀<span class="regexp">/上/</span>，<span class="regexp">/工信处/</span>女干事<span class="regexp">/每月/</span>经过<span class="regexp">/下属/</span>科室<span class="regexp">/都/</span>要<span class="regexp">/亲口/</span>交代<span class="regexp">/24/</span>口<span class="regexp">/交换机/</span>等<span class="regexp">/技术性/</span>器件<span class="regexp">/的/</span>安装<span class="regexp">/工作</span></span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>可见分词效果还是不错的。</p>
                <h4 id="全模式分词"><a href="#全模式分词" class="headerlink" title="全模式分词"></a>全模式分词</h4>
                <p>使用全模式分词需要添加 cut_all 参数，将其设置为 True，代码如下：</p>
                <figure class="highlight livecodeserver">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="built_in">result</span> = jieba.lcut(<span class="keyword">string</span>, cut_all=True)</span><br><span class="line">print(<span class="built_in">len</span>(<span class="built_in">result</span>), <span class="string">'/'</span>.join(<span class="built_in">result</span>))</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>结果如下：</p>
                <figure class="highlight awk">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="number">51</span> 这个<span class="regexp">/把手/</span>该换<span class="regexp">/了/</span><span class="regexp">//</span>我<span class="regexp">/不/</span>喜欢<span class="regexp">/日本/</span>和服<span class="regexp">//</span><span class="regexp">/别/</span>把手<span class="regexp">/放在/</span>我<span class="regexp">/的/</span>肩膀<span class="regexp">/上/</span><span class="regexp">//</span>工信处<span class="regexp">/处女/</span>女干事<span class="regexp">/干事/</span>每月<span class="regexp">/月经/</span>经过<span class="regexp">/下属/</span>科室<span class="regexp">/都/</span>要<span class="regexp">/亲口/</span>口交<span class="regexp">/交代/</span><span class="number">24</span><span class="regexp">/口交/</span>交换<span class="regexp">/交换机/</span>换机<span class="regexp">/等/</span>技术<span class="regexp">/技术性/</span>性器<span class="regexp">/器件/</span>的<span class="regexp">/安装/</span>安装工<span class="regexp">/装工/</span>工作</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <h4 id="搜索引擎模式分词"><a href="#搜索引擎模式分词" class="headerlink" title="搜索引擎模式分词"></a>搜索引擎模式分词</h4>
                <p>使用搜索引擎模式分词需要调用 cut_for_search() 方法，代码如下：</p>
                <figure class="highlight livecodeserver">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="built_in">result</span> = jieba.lcut_for_search(<span class="keyword">string</span>)</span><br><span class="line">print(<span class="built_in">len</span>(<span class="built_in">result</span>), <span class="string">'/'</span>.join(<span class="built_in">result</span>))</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>结果如下：</p>
                <figure class="highlight awk">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="number">42</span> 这个<span class="regexp">/把手/</span>该换<span class="regexp">/了/</span>，<span class="regexp">/我/</span>不<span class="regexp">/喜欢/</span>日本<span class="regexp">/和服/</span>，<span class="regexp">/别/</span>把手<span class="regexp">/放在/</span>我<span class="regexp">/的/</span>肩膀<span class="regexp">/上/</span>，<span class="regexp">/工信处/</span>干事<span class="regexp">/女干事/</span>每月<span class="regexp">/经过/</span>下属<span class="regexp">/科室/</span>都<span class="regexp">/要/</span>亲口<span class="regexp">/交代/</span><span class="number">24</span><span class="regexp">/口/</span>交换<span class="regexp">/换机/</span>交换机<span class="regexp">/等/</span>技术<span class="regexp">/技术性/</span>器件<span class="regexp">/的/</span>安装<span class="regexp">/工作</span></span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>另外可以加入自定义词典，如我们想把 日本和服 作为一个整体，可以把它添加到词典中，代码如下：</p>
                <figure class="highlight livecodeserver">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line">jieba.add_word(<span class="string">'日本和服'</span>)</span><br><span class="line"><span class="built_in">result</span> = jieba.lcut(<span class="keyword">string</span>)</span><br><span class="line">print(<span class="built_in">len</span>(<span class="built_in">result</span>), <span class="string">'/'</span>.join(<span class="built_in">result</span>))</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>结果如下：</p>
                <figure class="highlight awk">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="number">37</span> 这个<span class="regexp">/把手/</span>该换<span class="regexp">/了/</span>，<span class="regexp">/我/</span>不<span class="regexp">/喜欢/</span>日本和服<span class="regexp">/，/</span>别<span class="regexp">/把手/</span>放在<span class="regexp">/我/</span>的<span class="regexp">/肩膀/</span>上<span class="regexp">/，/</span>工信处<span class="regexp">/女干事/</span>每月<span class="regexp">/经过/</span>下属<span class="regexp">/科室/</span>都<span class="regexp">/要/</span>亲口<span class="regexp">/交代/</span><span class="number">24</span><span class="regexp">/口/</span>交换机<span class="regexp">/等/</span>技术性<span class="regexp">/器件/</span>的<span class="regexp">/安装/</span>工作</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>可以看到切分结果中，日本和服 四个字就作为一个整体出现在结果中了，分词数量比精确模式少了一个。</p>
                <h4 id="词性标注"><a href="#词性标注" class="headerlink" title="词性标注"></a>词性标注</h4>
                <p>另外 jieba 还支持词性标注，可以输出分词后每个词的词性，实例如下：</p>
                <figure class="highlight applescript">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="built_in">words</span> = pseg.lcut(<span class="built_in">string</span>)</span><br><span class="line">print(<span class="built_in">list</span>(map(lambda x: <span class="built_in">list</span>(x), <span class="built_in">words</span>)))</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>运行结果：</p>
                <figure class="highlight scheme">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line">[[<span class="symbol">'这个</span>', <span class="symbol">'r</span>'], [<span class="symbol">'把手</span>', <span class="symbol">'v</span>'], [<span class="symbol">'该</span>', <span class="symbol">'r</span>'], [<span class="symbol">'换</span>', <span class="symbol">'v</span>'], [<span class="symbol">'了</span>', <span class="symbol">'ul</span>'], [<span class="symbol">'，</span>', <span class="symbol">'x</span>'], [<span class="symbol">'我</span>', <span class="symbol">'r</span>'], [<span class="symbol">'不</span>', <span class="symbol">'d</span>'], [<span class="symbol">'喜欢</span>', <span class="symbol">'v</span>'], [<span class="symbol">'日本和服</span>', <span class="symbol">'x</span>'], [<span class="symbol">'，</span>', <span class="symbol">'x</span>'], [<span class="symbol">'别</span>', <span class="symbol">'r</span>'], [<span class="symbol">'把手</span>', <span class="symbol">'v</span>'], [<span class="symbol">'放在</span>', <span class="symbol">'v</span>'], [<span class="symbol">'我</span>', <span class="symbol">'r</span>'], [<span class="symbol">'的</span>', <span class="symbol">'uj</span>'], [<span class="symbol">'肩膀</span>', <span class="symbol">'n</span>'], [<span class="symbol">'上</span>', <span class="symbol">'f</span>'], [<span class="symbol">'，</span>', <span class="symbol">'x</span>'], [<span class="symbol">'工信处</span>', <span class="symbol">'n</span>'], [<span class="symbol">'女干事</span>', <span class="symbol">'n</span>'], [<span class="symbol">'每月</span>', <span class="symbol">'r</span>'], [<span class="symbol">'经过</span>', <span class="symbol">'p</span>'], [<span class="symbol">'下属</span>', <span class="symbol">'v</span>'], [<span class="symbol">'科室</span>', <span class="symbol">'n</span>'], [<span class="symbol">'都</span>', <span class="symbol">'d</span>'], [<span class="symbol">'要</span>', <span class="symbol">'v</span>'], [<span class="symbol">'亲口</span>', <span class="symbol">'n</span>'], [<span class="symbol">'交代</span>', <span class="symbol">'n</span>'], [<span class="symbol">'24</span>', <span class="symbol">'m</span>'], [<span class="symbol">'口</span>', <span class="symbol">'n</span>'], [<span class="symbol">'交换机</span>', <span class="symbol">'n</span>'], [<span class="symbol">'等</span>', <span class="symbol">'u</span>'], [<span class="symbol">'技术性</span>', <span class="symbol">'n</span>'], [<span class="symbol">'器件</span>', <span class="symbol">'n</span>'], [<span class="symbol">'的</span>', <span class="symbol">'uj</span>'], [<span class="symbol">'安装</span>', <span class="symbol">'v</span>'], [<span class="symbol">'工作</span>', <span class="symbol">'vn</span>']]</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>关于词性的说明可以参考：<a href="https://gist.github.com/luw2007/6016931" target="_blank" rel="noopener">https://gist.github.com/luw2007/6016931</a>。</p>
                <h3 id="2-SnowNLP"><a href="#2-SnowNLP" class="headerlink" title="2. SnowNLP"></a>2. SnowNLP</h3>
                <p>SnowNLP: Simplified Chinese Text Processing，可以方便的处理中文文本内容，是受到了 TextBlob 的启发而写的，由于现在大部分的自然语言处理库基本都是针对英文的，于是写了一个方便处理中文的类库，并且和 TextBlob 不同的是，这里没有用 NLTK，所有的算法都是自己实现的，并且自带了一些训练好的字典。GitHub地址：<a href="https://github.com/isnowfy/snownlp" target="_blank" rel="noopener">https://github.com/isnowfy/snownlp</a>。</p>
                <h4 id="分词"><a href="#分词" class="headerlink" title="分词"></a>分词</h4>
                <p>这里的分词是基于 Character-Based Generative Model 来实现的，论文地址：<a href="http://aclweb.org/anthology//Y/Y09/Y09-2047.pdf" target="_blank" rel="noopener">http://aclweb.org/anthology//Y/Y09/Y09-2047.pdf</a>，我们还是以上面的例子说明，相关使用说明如下：</p>
                <figure class="highlight livecodeserver">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="built_in">from</span> snownlp import SnowNLP</span><br><span class="line"></span><br><span class="line"><span class="keyword">string</span> = <span class="string">'这个把手该换了，我不喜欢日本和服，别把手放在我的肩膀上，工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作'</span></span><br><span class="line">s = SnowNLP(<span class="keyword">string</span>)</span><br><span class="line"><span class="built_in">result</span> = s.<span class="keyword">words</span></span><br><span class="line">print(<span class="built_in">len</span>(<span class="built_in">result</span>), <span class="string">'/'</span>.join(<span class="built_in">result</span>))</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>运行结果：</p>
                <figure class="highlight awk">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="number">40</span> 这个<span class="regexp">/把手/</span>该<span class="regexp">/换/</span>了<span class="regexp">/，/</span>我<span class="regexp">/不/</span>喜欢<span class="regexp">/日本/</span>和<span class="regexp">/服/</span>，<span class="regexp">/别把手/</span>放在<span class="regexp">/我/</span>的<span class="regexp">/肩膀/</span>上<span class="regexp">/，/</span>工<span class="regexp">/信处女/</span>干事<span class="regexp">/每月/</span>经过<span class="regexp">/下属/</span>科室<span class="regexp">/都/</span>要<span class="regexp">/亲口/</span>交代<span class="regexp">/24/</span>口<span class="regexp">/交换机/</span>等<span class="regexp">/技术性/</span>器件<span class="regexp">/的/</span>安装<span class="regexp">/工作</span></span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>经过观察，可以发现分词效果其实不怎么理想，和服 被分开了，工信处 也被分开了，女干事 也被分开了。 另外 SnowNLP 还支持很多功能，例如词性标注（HMM）、情感分析、拼音转换（Trie树）、关键词和摘要生成（TextRank）。 我们简单看一个实例：</p>
                <figure class="highlight stylus">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="function"><span class="title">print</span><span class="params">(<span class="string">'Tags:'</span>, list(s.tags)</span></span>)</span><br><span class="line"><span class="function"><span class="title">print</span><span class="params">(<span class="string">'Sentiments:'</span>, s.sentiments)</span></span></span><br><span class="line"><span class="function"><span class="title">print</span><span class="params">(<span class="string">'Pinyin:'</span>, s.pinyin)</span></span></span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>运行结果：</p>
                <figure class="highlight css">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="selector-tag">Tags</span>: <span class="selector-attr">[(<span class="string">'这个'</span>, <span class="string">'r'</span>), (<span class="string">'把手'</span>, <span class="string">'Ng'</span>), (<span class="string">'该'</span>, <span class="string">'r'</span>), (<span class="string">'换'</span>, <span class="string">'v'</span>), (<span class="string">'了'</span>, <span class="string">'y'</span>), (<span class="string">'，'</span>, <span class="string">'w'</span>), (<span class="string">'我'</span>, <span class="string">'r'</span>), (<span class="string">'不'</span>, <span class="string">'d'</span>), (<span class="string">'喜欢'</span>, <span class="string">'v'</span>), (<span class="string">'日本'</span>, <span class="string">'ns'</span>), (<span class="string">'和'</span>, <span class="string">'c'</span>), (<span class="string">'服'</span>, <span class="string">'v'</span>), (<span class="string">'，'</span>, <span class="string">'w'</span>), (<span class="string">'别把手'</span>, <span class="string">'ad'</span>), (<span class="string">'放在'</span>, <span class="string">'v'</span>), (<span class="string">'我'</span>, <span class="string">'r'</span>), (<span class="string">'的'</span>, <span class="string">'u'</span>), (<span class="string">'肩膀'</span>, <span class="string">'n'</span>), (<span class="string">'上'</span>, <span class="string">'f'</span>), (<span class="string">'，'</span>, <span class="string">'w'</span>), (<span class="string">'工'</span>, <span class="string">'j'</span>), (<span class="string">'信处女'</span>, <span class="string">'j'</span>), (<span class="string">'干事'</span>, <span class="string">'n'</span>), (<span class="string">'每月'</span>, <span class="string">'r'</span>), (<span class="string">'经过'</span>, <span class="string">'p'</span>), (<span class="string">'下属'</span>, <span class="string">'v'</span>), (<span class="string">'科室'</span>, <span class="string">'n'</span>), (<span class="string">'都'</span>, <span class="string">'d'</span>), (<span class="string">'要'</span>, <span class="string">'v'</span>), (<span class="string">'亲口'</span>, <span class="string">'d'</span>), (<span class="string">'交代'</span>, <span class="string">'v'</span>), (<span class="string">'24'</span>, <span class="string">'m'</span>), (<span class="string">'口'</span>, <span class="string">'q'</span>), (<span class="string">'交换机'</span>, <span class="string">'n'</span>), (<span class="string">'等'</span>, <span class="string">'u'</span>), (<span class="string">'技术性'</span>, <span class="string">'n'</span>), (<span class="string">'器件'</span>, <span class="string">'n'</span>), (<span class="string">'的'</span>, <span class="string">'u'</span>), (<span class="string">'安装'</span>, <span class="string">'vn'</span>), (<span class="string">'工作'</span>, <span class="string">'vn'</span>)]</span></span><br><span class="line"><span class="selector-tag">Sentiments</span>: 0<span class="selector-class">.015678817603646866</span></span><br><span class="line"><span class="selector-tag">Pinyin</span>: <span class="selector-attr">[<span class="string">'zhe'</span>, <span class="string">'ge'</span>, <span class="string">'ba'</span>, <span class="string">'shou'</span>, <span class="string">'gai'</span>, <span class="string">'huan'</span>, <span class="string">'liao'</span>, <span class="string">'，'</span>, <span class="string">'wo'</span>, <span class="string">'bu'</span>, <span class="string">'xi'</span>, <span class="string">'huan'</span>, <span class="string">'ri'</span>, <span class="string">'ben'</span>, <span class="string">'he'</span>, <span class="string">'fu'</span>, <span class="string">'，'</span>, <span class="string">'bie'</span>, <span class="string">'ba'</span>, <span class="string">'shou'</span>, <span class="string">'fang'</span>, <span class="string">'zai'</span>, <span class="string">'wo'</span>, <span class="string">'de'</span>, <span class="string">'jian'</span>, <span class="string">'bang'</span>, <span class="string">'shang'</span>, <span class="string">'，'</span>, <span class="string">'gong'</span>, <span class="string">'xin'</span>, <span class="string">'chu'</span>, <span class="string">'nv'</span>, <span class="string">'gan'</span>, <span class="string">'shi'</span>, <span class="string">'mei'</span>, <span class="string">'yue'</span>, <span class="string">'jing'</span>, <span class="string">'guo'</span>, <span class="string">'xia'</span>, <span class="string">'shu'</span>, <span class="string">'ke'</span>, <span class="string">'shi'</span>, <span class="string">'dou'</span>, <span class="string">'yao'</span>, <span class="string">'qin'</span>, <span class="string">'kou'</span>, <span class="string">'jiao'</span>, <span class="string">'dai'</span>, <span class="string">'24'</span>, <span class="string">'kou'</span>, <span class="string">'jiao'</span>, <span class="string">'huan'</span>, <span class="string">'ji'</span>, <span class="string">'deng'</span>, <span class="string">'ji'</span>, <span class="string">'shu'</span>, <span class="string">'xing'</span>, <span class="string">'qi'</span>, <span class="string">'jian'</span>, <span class="string">'de'</span>, <span class="string">'an'</span>, <span class="string">'zhuang'</span>, <span class="string">'gong'</span>, <span class="string">'zuo'</span>]</span></span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <h3 id="3-THULAC"><a href="#3-THULAC" class="headerlink" title="3. THULAC"></a>3. THULAC</h3>
                <p>THULAC（THU Lexical Analyzer for Chinese）由清华大学自然语言处理与社会人文计算实验室研制推出的一套中文词法分析工具包，GitHub 链接：<a href="https://github.com/thunlp/THULAC-Python" target="_blank" rel="noopener">https://github.com/thunlp/THULAC-Python</a>，具有中文分词和词性标注功能。THULAC具有如下几个特点：</p>
                <ul>
                  <li>能力强。利用集成的目前世界上规模最大的人工分词和词性标注中文语料库（约含5800万字）训练而成，模型标注能力强大。</li>
                  <li>准确率高。该工具包在标准数据集Chinese Treebank（CTB5）上分词的F1值可达97.3％，词性标注的F1值可达到92.9％，与该数据集上最好方法效果相当。</li>
                  <li>速度较快。同时进行分词和词性标注速度为300KB/s，每秒可处理约15万字。只进行分词速度可达到1.3MB/s。</li>
                </ul>
                <p>我们用一个实例看一下分词效果：</p>
                <figure class="highlight go">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="keyword">import</span> thulac</span><br><span class="line"></span><br><span class="line"><span class="keyword">string</span> = <span class="string">'这个把手该换了，我不喜欢日本和服，别把手放在我的肩膀上，工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作'</span></span><br><span class="line">t = thulac.thulac()</span><br><span class="line">result = t.cut(<span class="keyword">string</span>)</span><br><span class="line"><span class="built_in">print</span>(result)</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>运行结果：</p>
                <figure class="highlight scheme">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line">[[<span class="symbol">'这个</span>', <span class="symbol">'r</span>'], [<span class="symbol">'把手</span>', <span class="symbol">'n</span>'], [<span class="symbol">'该</span>', <span class="symbol">'v</span>'], [<span class="symbol">'换</span>', <span class="symbol">'v</span>'], [<span class="symbol">'了</span>', <span class="symbol">'u</span>'], [<span class="symbol">'，</span>', <span class="symbol">'w</span>'], [<span class="symbol">'我</span>', <span class="symbol">'r</span>'], [<span class="symbol">'不</span>', <span class="symbol">'d</span>'], [<span class="symbol">'喜欢</span>', <span class="symbol">'v</span>'], [<span class="symbol">'日本</span>', <span class="symbol">'ns</span>'], [<span class="symbol">'和服</span>', <span class="symbol">'n</span>'], [<span class="symbol">'，</span>', <span class="symbol">'w</span>'], [<span class="symbol">'别把手</span>', <span class="symbol">'n</span>'], [<span class="symbol">'放</span>', <span class="symbol">'v</span>'], [<span class="symbol">'在</span>', <span class="symbol">'p</span>'], [<span class="symbol">'我</span>', <span class="symbol">'r</span>'], [<span class="symbol">'的</span>', <span class="symbol">'u</span>'], [<span class="symbol">'肩膀</span>', <span class="symbol">'n</span>'], [<span class="symbol">'上</span>', <span class="symbol">'f</span>'], [<span class="symbol">'，</span>', <span class="symbol">'w</span>'], [<span class="symbol">'工信处</span>', <span class="symbol">'n</span>'], [<span class="symbol">'女</span>', <span class="symbol">'a</span>'], [<span class="symbol">'干事</span>', <span class="symbol">'n</span>'], [<span class="symbol">'每月</span>', <span class="symbol">'r</span>'], [<span class="symbol">'经过</span>', <span class="symbol">'p</span>'], [<span class="symbol">'下属</span>', <span class="symbol">'v</span>'], [<span class="symbol">'科室</span>', <span class="symbol">'n</span>'], [<span class="symbol">'都</span>', <span class="symbol">'d</span>'], [<span class="symbol">'要</span>', <span class="symbol">'v</span>'], [<span class="symbol">'亲口</span>', <span class="symbol">'d</span>'], [<span class="symbol">'交代</span>', <span class="symbol">'v</span>'], [<span class="symbol">'24</span>', <span class="symbol">'m</span>'], [<span class="symbol">'口</span>', <span class="symbol">'q</span>'], [<span class="symbol">'交换机</span>', <span class="symbol">'n</span>'], [<span class="symbol">'等</span>', <span class="symbol">'u</span>'], [<span class="symbol">'技术性</span>', <span class="symbol">'n</span>'], [<span class="symbol">'器件</span>', <span class="symbol">'n</span>'], [<span class="symbol">'的</span>', <span class="symbol">'u</span>'], [<span class="symbol">'安装</span>', <span class="symbol">'v</span>'], [<span class="symbol">'工作</span>', <span class="symbol">'v</span>']]</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <h3 id="4-NLPIR"><a href="#4-NLPIR" class="headerlink" title="4. NLPIR"></a>4. NLPIR</h3>
                <p>NLPIR 分词系统，前身为2000年发布的 ICTCLAS 词法分析系统，GitHub 链接：<a href="https://github.com/NLPIR-team/NLPIR" target="_blank" rel="noopener">https://github.com/NLPIR-team/NLPIR</a>，是由北京理工大学张华平博士研发的中文分词系统，经过十余年的不断完善，拥有丰富的功能和强大的性能。NLPIR是一整套对原始文本集进行处理和加工的软件，提供了中间件处理效果的可视化展示，也可以作为小规模数据的处理加工工具。主要功能包括：中文分词，词性标注，命名实体识别，用户词典、新词发现与关键词提取等功能。另外对于分词功能，它有 Python 实现的版本，GitHub 链接：<a href="https://github.com/tsroten/pynlpir" target="_blank" rel="noopener">https://github.com/tsroten/pynlpir</a>。 使用方法如下：</p>
                <figure class="highlight livecodeserver">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line">import pynlpir</span><br><span class="line"></span><br><span class="line">pynlpir.<span class="built_in">open</span>()</span><br><span class="line"><span class="keyword">string</span> = <span class="string">'这个把手该换了，我不喜欢日本和服，别把手放在我的肩膀上，工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作'</span></span><br><span class="line"><span class="built_in">result</span> = pynlpir.<span class="keyword">segment</span>(<span class="keyword">string</span>)</span><br><span class="line">print(<span class="built_in">result</span>)</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>运行结果如下：</p>
                <figure class="highlight scheme">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line">[(<span class="symbol">'这个</span>', <span class="symbol">'pronoun</span>'), (<span class="symbol">'把</span>', <span class="symbol">'preposition</span>'), (<span class="symbol">'手</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'该</span>', <span class="symbol">'pronoun</span>'), (<span class="symbol">'换</span>', <span class="symbol">'verb</span>'), (<span class="symbol">'了</span>', <span class="symbol">'modal</span> particle'), (<span class="symbol">'，</span>', <span class="symbol">'punctuation</span> mark'), (<span class="symbol">'我</span>', <span class="symbol">'pronoun</span>'), (<span class="symbol">'不</span>', <span class="symbol">'adverb</span>'), (<span class="symbol">'喜欢</span>', <span class="symbol">'verb</span>'), (<span class="symbol">'日本</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'和</span>', <span class="symbol">'conjunction</span>'), (<span class="symbol">'服</span>', <span class="symbol">'verb</span>'), (<span class="symbol">'，</span>', <span class="symbol">'punctuation</span> mark'), (<span class="symbol">'别</span>', <span class="symbol">'adverb</span>'), (<span class="symbol">'把</span>', <span class="symbol">'preposition</span>'), (<span class="symbol">'手</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'放</span>', <span class="symbol">'verb</span>'), (<span class="symbol">'在</span>', <span class="symbol">'preposition</span>'), (<span class="symbol">'我</span>', <span class="symbol">'pronoun</span>'), (<span class="symbol">'的</span>', <span class="symbol">'particle</span>'), (<span class="symbol">'肩膀</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'上</span>', <span class="symbol">'noun</span> of locality'), (<span class="symbol">'，</span>', <span class="symbol">'punctuation</span> mark'), (<span class="symbol">'工</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'信</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'处女</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'干事</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'每月</span>', <span class="symbol">'pronoun</span>'), (<span class="symbol">'经过</span>', <span class="symbol">'preposition</span>'), (<span class="symbol">'下属</span>', <span class="symbol">'verb</span>'), (<span class="symbol">'科室</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'都</span>', <span class="symbol">'adverb</span>'), (<span class="symbol">'要</span>', <span class="symbol">'verb</span>'), (<span class="symbol">'亲口</span>', <span class="symbol">'adverb</span>'), (<span class="symbol">'交代</span>', <span class="symbol">'verb</span>'), (<span class="symbol">'24</span>', <span class="symbol">'numeral</span>'), (<span class="symbol">'口</span>', <span class="symbol">'classifier</span>'), (<span class="symbol">'交换机</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'等</span>', <span class="symbol">'particle</span>'), (<span class="symbol">'技术性</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'器件</span>', <span class="symbol">'noun</span>'), (<span class="symbol">'的</span>', <span class="symbol">'particle</span>'), (<span class="symbol">'安装</span>', <span class="symbol">'verb</span>'), (<span class="symbol">'工作</span>', <span class="symbol">'verb</span>')]</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>这里 把手 和 和服 也被分开了。</p>
                <h3 id="5-NLTK"><a href="#5-NLTK" class="headerlink" title="5. NLTK"></a>5. NLTK</h3>
                <p>NLTK，Natural Language Toolkit，是一个自然语言处理的包工具，各种多种 NLP 处理相关功能，GitHub 链接：<a href="https://github.com/nltk/nltk" target="_blank" rel="noopener">https://github.com/nltk/nltk</a>。 但是 NLTK 对于中文分词是不支持的，示例如下：</p>
                <figure class="highlight isbl">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="variable">from</span> <span class="variable">nltk</span> <span class="variable">import</span> <span class="variable">word_tokenize</span></span><br><span class="line"></span><br><span class="line"><span class="variable">string</span> = <span class="string">'这个把手该换了，我不喜欢日本和服，别把手放在我的肩膀上，工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作'</span></span><br><span class="line"><span class="variable"><span class="class">result</span></span> = <span class="function"><span class="title">word_tokenize</span>(<span class="variable">string</span>)</span></span><br><span class="line"><span class="function"><span class="title">print</span>(<span class="variable"><span class="class">result</span></span>)</span></span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>结果：</p>
                <figure class="highlight scheme">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line">[<span class="symbol">'这个把手该换了，我不喜欢日本和服，别把手放在我的肩膀上，工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作</span>']</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>如果要用中文分词的话，可以使用 FoolNLTK，它使用 Bi-LSTM 训练而成，包含分词、词性标注、实体识别等功能，同时支持自定义词典，可以训练自己的模型，可以进行批量处理。 使用方法如下：</p>
                <figure class="highlight go">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="keyword">import</span> fool</span><br><span class="line"></span><br><span class="line"><span class="keyword">string</span> = <span class="string">'这个把手该换了，我不喜欢日本和服，别把手放在我的肩膀上，工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作'</span></span><br><span class="line">result = fool.cut(<span class="keyword">string</span>)</span><br><span class="line"><span class="built_in">print</span>(result)</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>运行结果：</p>
                <figure class="highlight scheme">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line">[[<span class="symbol">'这个</span>', <span class="symbol">'把手</span>', <span class="symbol">'该</span>', <span class="symbol">'换</span>', <span class="symbol">'了</span>', <span class="symbol">'，</span>', <span class="symbol">'我</span>', <span class="symbol">'不</span>', <span class="symbol">'喜欢</span>', <span class="symbol">'日本</span>', <span class="symbol">'和服</span>', <span class="symbol">'，</span>', <span class="symbol">'别</span>', <span class="symbol">'把</span>', <span class="symbol">'手</span>', <span class="symbol">'放</span>', <span class="symbol">'在</span>', <span class="symbol">'我</span>', <span class="symbol">'的</span>', <span class="symbol">'肩膀</span>', <span class="symbol">'上</span>', <span class="symbol">'，</span>', <span class="symbol">'工信处</span>', <span class="symbol">'女</span>', <span class="symbol">'干事</span>', <span class="symbol">'每月</span>', <span class="symbol">'经过</span>', <span class="symbol">'下属</span>', <span class="symbol">'科室</span>', <span class="symbol">'都</span>', <span class="symbol">'要</span>', <span class="symbol">'亲</span>', <span class="symbol">'口</span>', <span class="symbol">'交代</span>', <span class="symbol">'24</span>', <span class="symbol">'口</span>', <span class="symbol">'交换机</span>', <span class="symbol">'等</span>', <span class="symbol">'技术性</span>', <span class="symbol">'器件</span>', <span class="symbol">'的</span>', <span class="symbol">'安装</span>', <span class="symbol">'工作</span>']]</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>可以看到这个分词效果还是不错的。 另外还可以进行词性标注，实体识别：</p>
                <figure class="highlight gauss">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line">result = fool.pos_cut(<span class="keyword">string</span>)</span><br><span class="line"><span class="keyword">print</span>(result)</span><br><span class="line">_, ners = fool.analysis(<span class="keyword">string</span>)</span><br><span class="line"><span class="keyword">print</span>(ners)</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>运行结果：</p>
                <figure class="highlight scheme">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line">[[(<span class="symbol">'这个</span>', <span class="symbol">'r</span>'), (<span class="symbol">'把手</span>', <span class="symbol">'n</span>'), (<span class="symbol">'该</span>', <span class="symbol">'r</span>'), (<span class="symbol">'换</span>', <span class="symbol">'v</span>'), (<span class="symbol">'了</span>', <span class="symbol">'y</span>'), (<span class="symbol">'，</span>', <span class="symbol">'wd</span>'), (<span class="symbol">'我</span>', <span class="symbol">'r</span>'), (<span class="symbol">'不</span>', <span class="symbol">'d</span>'), (<span class="symbol">'喜欢</span>', <span class="symbol">'vi</span>'), (<span class="symbol">'日本</span>', <span class="symbol">'ns</span>'), (<span class="symbol">'和服</span>', <span class="symbol">'n</span>'), (<span class="symbol">'，</span>', <span class="symbol">'wd</span>'), (<span class="symbol">'别</span>', <span class="symbol">'d</span>'), (<span class="symbol">'把</span>', <span class="symbol">'pba</span>'), (<span class="symbol">'手</span>', <span class="symbol">'n</span>'), (<span class="symbol">'放</span>', <span class="symbol">'v</span>'), (<span class="symbol">'在</span>', <span class="symbol">'p</span>'), (<span class="symbol">'我</span>', <span class="symbol">'r</span>'), (<span class="symbol">'的</span>', <span class="symbol">'ude</span>'), (<span class="symbol">'肩膀</span>', <span class="symbol">'n</span>'), (<span class="symbol">'上</span>', <span class="symbol">'f</span>'), (<span class="symbol">'，</span>', <span class="symbol">'wd</span>'), (<span class="symbol">'工信处</span>', <span class="symbol">'ns</span>'), (<span class="symbol">'女</span>', <span class="symbol">'b</span>'), (<span class="symbol">'干事</span>', <span class="symbol">'n</span>'), (<span class="symbol">'每月</span>', <span class="symbol">'r</span>'), (<span class="symbol">'经过</span>', <span class="symbol">'p</span>'), (<span class="symbol">'下属</span>', <span class="symbol">'v</span>'), (<span class="symbol">'科室</span>', <span class="symbol">'n</span>'), (<span class="symbol">'都</span>', <span class="symbol">'d</span>'), (<span class="symbol">'要</span>', <span class="symbol">'v</span>'), (<span class="symbol">'亲</span>', <span class="symbol">'a</span>'), (<span class="symbol">'口</span>', <span class="symbol">'n</span>'), (<span class="symbol">'交代</span>', <span class="symbol">'v</span>'), (<span class="symbol">'24</span>', <span class="symbol">'m</span>'), (<span class="symbol">'口</span>', <span class="symbol">'q</span>'), (<span class="symbol">'交换机</span>', <span class="symbol">'n</span>'), (<span class="symbol">'等</span>', <span class="symbol">'udeng</span>'), (<span class="symbol">'技术性</span>', <span class="symbol">'n</span>'), (<span class="symbol">'器件</span>', <span class="symbol">'n</span>'), (<span class="symbol">'的</span>', <span class="symbol">'ude</span>'), (<span class="symbol">'安装</span>', <span class="symbol">'n</span>'), (<span class="symbol">'工作</span>', <span class="symbol">'n</span>')]]</span><br><span class="line">[[(<span class="name">12</span>, <span class="number">15</span>, <span class="symbol">'location</span>', <span class="symbol">'日本</span>')]]</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <h3 id="6-LTP"><a href="#6-LTP" class="headerlink" title="6. LTP"></a>6. LTP</h3>
                <p>语言技术平台（Language Technology Platform，LTP）是哈工大社会计算与信息检索研究中心历时十年开发的一整套中文语言处理系统。LTP制定了基于XML的语言处理结果表示，并在此基础上提供了一整套自底向上的丰富而且高效的中文语言处理模块（包括词法、句法、语义等6项中文处理核心技术），以及基于动态链接库（Dynamic Link Library, DLL）的应用程序接口、可视化工具，并且能够以网络服务（Web Service）的形式进行使用。 LTP 有 Python 版本，GitHub地址：<a href="https://github.com/HIT-SCIR/pyltp" target="_blank" rel="noopener">https://github.com/HIT-SCIR/pyltp</a>，另外运行的时候需要下载模型，模型还比较大，下载地址：<a href="http://ltp.ai/download.html" target="_blank" rel="noopener">http://ltp.ai/download.html</a>。 示例代码如下：</p>
                <figure class="highlight isbl">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="variable">from</span> <span class="variable">pyltp</span> <span class="variable">import</span> <span class="variable">Segmentor</span></span><br><span class="line"></span><br><span class="line"><span class="variable">string</span> = <span class="string">'这个把手该换了，我不喜欢日本和服，别把手放在我的肩膀上，工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作'</span></span><br><span class="line"><span class="variable">segmentor</span> = <span class="function"><span class="title">Segmentor</span>()</span></span><br><span class="line"><span class="variable">segmentor.load</span>(<span class="string">'./cws.model'</span>)</span><br><span class="line"><span class="variable"><span class="class">result</span></span> = <span class="function"><span class="title">list</span>(<span class="variable">segmentor.segment</span>(<span class="variable">string</span>))</span></span><br><span class="line"><span class="variable">segmentor.release</span>()</span><br><span class="line"><span class="function"><span class="title">print</span>(<span class="variable"><span class="class">result</span></span>)</span></span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>运行结果：</p>
                <figure class="highlight awk">
                  <table>
                    <tr>
                      <td class="gutter">
                        <pre><span class="line">1</span><br></pre>
                      </td>
                      <td class="code">
                        <pre><span class="line"><span class="number">41</span> 这个<span class="regexp">/把手/</span>该<span class="regexp">/换/</span>了<span class="regexp">/，/</span>我<span class="regexp">/不/</span>喜欢<span class="regexp">/日本/</span>和服<span class="regexp">/，/</span>别<span class="regexp">/把/</span>手<span class="regexp">/放在/</span>我<span class="regexp">/的/</span>肩膀<span class="regexp">/上/</span>，<span class="regexp">/工信/</span>处女<span class="regexp">/干事/</span>每月<span class="regexp">/经过/</span>下属<span class="regexp">/科室/</span>都<span class="regexp">/要/</span>亲口<span class="regexp">/交代/</span><span class="number">24</span><span class="regexp">/口/</span>交换机<span class="regexp">/等/</span>技术性<span class="regexp">/器件/</span>的<span class="regexp">/安装/</span>工作</span><br></pre>
                      </td>
                    </tr>
                  </table>
                </figure>
                <p>可以发现 工信处、女干事 没有正确分开。 以上便是一些分词库的基本使用，个人比较推荐的有 jieba、THULAC、FoolNLTK。</p>
                <h2 id="参考来源"><a href="#参考来源" class="headerlink" title="参考来源"></a>参考来源</h2>
                <ul>
                  <li><a href="http://m635674608.iteye.com/blog/2298833" target="_blank" rel="noopener">http://m635674608.iteye.com/blog/2298833</a></li>
                  <li><a href="http://blog.csdn.net/flysky1991/article/details/73948971" target="_blank" rel="noopener">http://blog.csdn.net/flysky1991/article/details/73948971</a></li>
                </ul>
              </div>
              <div class="reward-container">
                <div></div>
                <button onclick="var qr = document.getElementById('qr'); qr.style.display = (qr.style.display === 'none') ? 'block' : 'none';"> 打赏 </button>
                <div id="qr" style="display: none;">
                  <div style="display: inline-block;">
                    <img src="/images/wechatpay.jpg" alt="崔庆才 微信支付">
                    <p>微信支付</p>
                  </div>
                  <div style="display: inline-block;">
                    <img src="/images/alipay.jpg" alt="崔庆才 支付宝">
                    <p>支付宝</p>
                  </div>
                </div>
              </div>
              <footer class="post-footer">
                <div class="post-nav">
                  <div class="post-nav-item">
                    <a href="/5822.html" rel="prev" title="深度学习 GPU环境 Ubuntu 16.04 + Nvidia GTX 1080 + Python 3.6 + CUDA 9.0 + cuDNN 7.1 + TensorFlow 1.6 环境配置">
                      <i class="fa fa-chevron-left"></i> 深度学习 GPU环境 Ubuntu 16.04 + Nvidia GTX 1080 + Python 3.6 + CUDA 9.0 + cuDNN 7.1 + TensorFlow 1.6 环境配置 </a>
                  </div>
                  <div class="post-nav-item">
                    <a href="/5846.html" rel="next" title="Requests库作者另一神器Pipenv的用法"> Requests库作者另一神器Pipenv的用法 <i class="fa fa-chevron-right"></i>
                    </a>
                  </div>
                </div>
              </footer>
            </article>
          </div>
          <div class="comments" id="gitalk-container"></div>
          <script>
            window.addEventListener('tabs:register', () =>
            {
              let
              {
                activeClass
              } = CONFIG.comments;
              if (CONFIG.comments.storage)
              {
                activeClass = localStorage.getItem('comments_active') || activeClass;
              }
              if (activeClass)
              {
                let activeTab = document.querySelector(`a[href="#comment-${activeClass}"]`);
                if (activeTab)
                {
                  activeTab.click();
                }
              }
            });
            if (CONFIG.comments.storage)
            {
              window.addEventListener('tabs:click', event =>
              {
                if (!event.target.matches('.tabs-comment .tab-content .tab-pane')) return;
                let commentClass = event.target.classList[1];
                localStorage.setItem('comments_active', commentClass);
              });
            }

          </script>
        </div>
        <div class="toggle sidebar-toggle">
          <span class="toggle-line toggle-line-first"></span>
          <span class="toggle-line toggle-line-middle"></span>
          <span class="toggle-line toggle-line-last"></span>
        </div>
        <aside class="sidebar">
          <div class="sidebar-inner">
            <ul class="sidebar-nav motion-element">
              <li class="sidebar-nav-toc"> 文章目录 </li>
              <li class="sidebar-nav-overview"> 站点概览 </li>
            </ul>
            <!--noindex-->
            <div class="post-toc-wrap sidebar-panel">
              <div class="post-toc motion-element">
                <ol class="nav">
                  <li class="nav-item nav-level-2"><a class="nav-link" href="#原理"><span class="nav-number">1.</span> <span class="nav-text">原理</span></a>
                    <ol class="nav-child">
                      <li class="nav-item nav-level-3"><a class="nav-link" href="#基于规则的分词方法"><span class="nav-number">1.1.</span> <span class="nav-text">基于规则的分词方法</span></a></li>
                      <li class="nav-item nav-level-3"><a class="nav-link" href="#基于统计的分词方法"><span class="nav-number">1.2.</span> <span class="nav-text">基于统计的分词方法</span></a></li>
                      <li class="nav-item nav-level-3"><a class="nav-link" href="#基于语义的分词方法"><span class="nav-number">1.3.</span> <span class="nav-text">基于语义的分词方法</span></a></li>
                      <li class="nav-item nav-level-3"><a class="nav-link" href="#基于理解的分词方法"><span class="nav-number">1.4.</span> <span class="nav-text">基于理解的分词方法</span></a></li>
                    </ol>
                  </li>
                  <li class="nav-item nav-level-2"><a class="nav-link" href="#分词工具"><span class="nav-number">2.</span> <span class="nav-text">分词工具</span></a>
                    <ol class="nav-child">
                      <li class="nav-item nav-level-3"><a class="nav-link" href="#1-jieba"><span class="nav-number">2.1.</span> <span class="nav-text">1. jieba</span></a>
                        <ol class="nav-child">
                          <li class="nav-item nav-level-4"><a class="nav-link" href="#精确模式分词"><span class="nav-number">2.1.1.</span> <span class="nav-text">精确模式分词</span></a></li>
                          <li class="nav-item nav-level-4"><a class="nav-link" href="#全模式分词"><span class="nav-number">2.1.2.</span> <span class="nav-text">全模式分词</span></a></li>
                          <li class="nav-item nav-level-4"><a class="nav-link" href="#搜索引擎模式分词"><span class="nav-number">2.1.3.</span> <span class="nav-text">搜索引擎模式分词</span></a></li>
                          <li class="nav-item nav-level-4"><a class="nav-link" href="#词性标注"><span class="nav-number">2.1.4.</span> <span class="nav-text">词性标注</span></a></li>
                        </ol>
                      </li>
                      <li class="nav-item nav-level-3"><a class="nav-link" href="#2-SnowNLP"><span class="nav-number">2.2.</span> <span class="nav-text">2. SnowNLP</span></a>
                        <ol class="nav-child">
                          <li class="nav-item nav-level-4"><a class="nav-link" href="#分词"><span class="nav-number">2.2.1.</span> <span class="nav-text">分词</span></a></li>
                        </ol>
                      </li>
                      <li class="nav-item nav-level-3"><a class="nav-link" href="#3-THULAC"><span class="nav-number">2.3.</span> <span class="nav-text">3. THULAC</span></a></li>
                      <li class="nav-item nav-level-3"><a class="nav-link" href="#4-NLPIR"><span class="nav-number">2.4.</span> <span class="nav-text">4. NLPIR</span></a></li>
                      <li class="nav-item nav-level-3"><a class="nav-link" href="#5-NLTK"><span class="nav-number">2.5.</span> <span class="nav-text">5. NLTK</span></a></li>
                      <li class="nav-item nav-level-3"><a class="nav-link" href="#6-LTP"><span class="nav-number">2.6.</span> <span class="nav-text">6. LTP</span></a></li>
                    </ol>
                  </li>
                  <li class="nav-item nav-level-2"><a class="nav-link" href="#参考来源"><span class="nav-number">3.</span> <span class="nav-text">参考来源</span></a></li>
                </ol>
              </div>
            </div>
            <!--/noindex-->
            <div class="site-overview-wrap sidebar-panel">
              <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
                <img class="site-author-image" itemprop="image" alt="崔庆才" src="/images/avatar.png">
                <p class="site-author-name" itemprop="name">崔庆才</p>
                <div class="site-description" itemprop="description">崔庆才的个人站点，记录生活的瞬间，分享学习的心得。</div>
              </div>
              <div class="site-state-wrap motion-element">
                <nav class="site-state">
                  <div class="site-state-item site-state-posts">
                    <a href="/archives/">
                      <span class="site-state-item-count">608</span>
                      <span class="site-state-item-name">日志</span>
                    </a>
                  </div>
                  <div class="site-state-item site-state-categories">
                    <a href="/categories/">
                      <span class="site-state-item-count">24</span>
                      <span class="site-state-item-name">分类</span></a>
                  </div>
                  <div class="site-state-item site-state-tags">
                    <a href="/tags/">
                      <span class="site-state-item-count">156</span>
                      <span class="site-state-item-name">标签</span></a>
                  </div>
                </nav>
              </div>
              <div class="links-of-author motion-element">
                <span class="links-of-author-item">
                  <a href="https://github.com/Germey" title="GitHub → https:&#x2F;&#x2F;github.com&#x2F;Germey" rel="noopener" target="_blank"><i class="fab fa-github fa-fw"></i>GitHub</a>
                </span>
                <span class="links-of-author-item">
                  <a href="mailto:cqc@cuiqingcai.com.com" title="邮件 → mailto:cqc@cuiqingcai.com.com" rel="noopener" target="_blank"><i class="fa fa-envelope fa-fw"></i>邮件</a>
                </span>
                <span class="links-of-author-item">
                  <a href="https://weibo.com/cuiqingcai" title="微博 → https:&#x2F;&#x2F;weibo.com&#x2F;cuiqingcai" rel="noopener" target="_blank"><i class="fab fa-weibo fa-fw"></i>微博</a>
                </span>
                <span class="links-of-author-item">
                  <a href="https://www.zhihu.com/people/Germey" title="知乎 → https:&#x2F;&#x2F;www.zhihu.com&#x2F;people&#x2F;Germey" rel="noopener" target="_blank"><i class="fa fa-magic fa-fw"></i>知乎</a>
                </span>
              </div>
            </div>
            <div style=" width: 100%;" class="sidebar-panel sidebar-panel-image sidebar-panel-active">
              <a href="https://tutorial.lengyue.video/?coupon=12ef4b1a-a3db-11ea-bb37-0242ac130002_cqx_850" target="_blank" rel="noopener">
                <img src="https://qiniu.cuiqingcai.com/bco2a.png" style=" width: 100%;">
              </a>
            </div>
            <div style=" width: 100%;" class="sidebar-panel sidebar-panel-image sidebar-panel-active">
              <a href="http://www.ipidea.net/?utm-source=cqc&utm-keyword=?cqc" target="_blank" rel="noopener">
                <img src="https://qiniu.cuiqingcai.com/0ywun.png" style=" width: 100%;">
              </a>
            </div>
            <div class="sidebar-panel sidebar-panel-tags sidebar-panel-active">
              <h4 class="name"> 标签云 </h4>
              <div class="content">
                <a href="/tags/2048/" style="font-size: 10px;">2048</a> <a href="/tags/API/" style="font-size: 10px;">API</a> <a href="/tags/Bootstrap/" style="font-size: 11.25px;">Bootstrap</a> <a href="/tags/CDN/" style="font-size: 10px;">CDN</a> <a href="/tags/CQC/" style="font-size: 10px;">CQC</a> <a href="/tags/CSS/" style="font-size: 10px;">CSS</a> <a href="/tags/CSS-%E5%8F%8D%E7%88%AC%E8%99%AB/" style="font-size: 10px;">CSS 反爬虫</a> <a href="/tags/CV/" style="font-size: 10px;">CV</a> <a href="/tags/Django/" style="font-size: 10px;">Django</a> <a href="/tags/Eclipse/" style="font-size: 11.25px;">Eclipse</a> <a href="/tags/FTP/" style="font-size: 10px;">FTP</a> <a href="/tags/Git/" style="font-size: 10px;">Git</a> <a href="/tags/GitHub/" style="font-size: 13.75px;">GitHub</a> <a href="/tags/HTML5/" style="font-size: 10px;">HTML5</a> <a href="/tags/Hexo/" style="font-size: 10px;">Hexo</a> <a href="/tags/IT/" style="font-size: 10px;">IT</a> <a href="/tags/JSP/" style="font-size: 10px;">JSP</a> <a href="/tags/JavaScript/" style="font-size: 10px;">JavaScript</a> <a href="/tags/K8s/" style="font-size: 10px;">K8s</a> <a href="/tags/LOGO/" style="font-size: 10px;">LOGO</a> <a href="/tags/Linux/" style="font-size: 10px;">Linux</a> <a href="/tags/MIUI/" style="font-size: 10px;">MIUI</a> <a href="/tags/MongoDB/" style="font-size: 10px;">MongoDB</a> <a href="/tags/Mysql/" style="font-size: 10px;">Mysql</a> <a href="/tags/NBA/" style="font-size: 10px;">NBA</a> <a href="/tags/PHP/" style="font-size: 11.25px;">PHP</a> <a href="/tags/PS/" style="font-size: 10px;">PS</a> <a href="/tags/Pathlib/" style="font-size: 10px;">Pathlib</a> <a href="/tags/PhantomJS/" style="font-size: 10px;">PhantomJS</a> <a href="/tags/Python/" style="font-size: 15px;">Python</a> <a href="/tags/Python3/" style="font-size: 12.5px;">Python3</a> <a href="/tags/Pythonic/" style="font-size: 10px;">Pythonic</a> <a href="/tags/QQ/" style="font-size: 10px;">QQ</a> <a href="/tags/Redis/" style="font-size: 10px;">Redis</a> <a href="/tags/SAE/" style="font-size: 10px;">SAE</a> <a href="/tags/SSH/" style="font-size: 10px;">SSH</a> <a href="/tags/SVG/" style="font-size: 10px;">SVG</a> <a href="/tags/Scrapy/" style="font-size: 10px;">Scrapy</a> <a href="/tags/Scrapy-redis/" style="font-size: 10px;">Scrapy-redis</a> <a href="/tags/Scrapy%E5%88%86%E5%B8%83%E5%BC%8F/" style="font-size: 10px;">Scrapy分布式</a> <a href="/tags/Selenium/" style="font-size: 10px;">Selenium</a> <a href="/tags/TKE/" style="font-size: 10px;">TKE</a> <a href="/tags/Ubuntu/" style="font-size: 11.25px;">Ubuntu</a> <a href="/tags/VS-Code/" style="font-size: 10px;">VS Code</a> <a href="/tags/Vs-Code/" style="font-size: 10px;">Vs Code</a> <a href="/tags/Vue/" style="font-size: 11.25px;">Vue</a> <a href="/tags/Webpack/" style="font-size: 10px;">Webpack</a> <a href="/tags/Windows/" style="font-size: 10px;">Windows</a> <a href="/tags/Winpcap/" style="font-size: 10px;">Winpcap</a> <a href="/tags/WordPress/" style="font-size: 13.75px;">WordPress</a> <a href="/tags/Youtube/" style="font-size: 11.25px;">Youtube</a> <a href="/tags/android/" style="font-size: 10px;">android</a> <a href="/tags/ansible/" style="font-size: 10px;">ansible</a> <a href="/tags/cocos2d-x/" style="font-size: 10px;">cocos2d-x</a> <a href="/tags/e6/" style="font-size: 10px;">e6</a> <a href="/tags/fitvids/" style="font-size: 10px;">fitvids</a> <a href="/tags/git/" style="font-size: 11.25px;">git</a> <a href="/tags/json/" style="font-size: 10px;">json</a> <a href="/tags/js%E9%80%86%E5%90%91/" style="font-size: 10px;">js逆向</a> <a href="/tags/kubernetes/" style="font-size: 10px;">kubernetes</a> <a href="/tags/log/" style="font-size: 10px;">log</a> <a href="/tags/logging/" style="font-size: 10px;">logging</a> <a href="/tags/matlab/" style="font-size: 11.25px;">matlab</a> <a href="/tags/python/" style="font-size: 20px;">python</a> <a href="/tags/pytube/" style="font-size: 11.25px;">pytube</a> <a href="/tags/pywin32/" style="font-size: 10px;">pywin32</a> <a href="/tags/style/" style="font-size: 10px;">style</a> <a href="/tags/tomcat/" style="font-size: 10px;">tomcat</a> <a href="/tags/ubuntu/" style="font-size: 10px;">ubuntu</a> <a href="/tags/uwsgi/" style="font-size: 10px;">uwsgi</a> <a href="/tags/vsftpd/" style="font-size: 10px;">vsftpd</a> <a href="/tags/wamp/" style="font-size: 10px;">wamp</a> <a href="/tags/wineQQ/" style="font-size: 10px;">wineQQ</a> <a href="/tags/%E4%B8%83%E7%89%9B/" style="font-size: 11.25px;">七牛</a> <a href="/tags/%E4%B8%8A%E6%B5%B7/" style="font-size: 10px;">上海</a> <a href="/tags/%E4%B8%AA%E4%BA%BA%E7%BD%91%E7%AB%99/" style="font-size: 10px;">个人网站</a> <a href="/tags/%E4%B8%BB%E9%A2%98/" style="font-size: 10px;">主题</a> <a href="/tags/%E4%BA%91%E4%BA%A7%E5%93%81/" style="font-size: 10px;">云产品</a> <a href="/tags/%E4%BA%91%E5%AD%98%E5%82%A8/" style="font-size: 10px;">云存储</a> <a href="/tags/%E4%BA%AC%E4%B8%9C%E4%BA%91/" style="font-size: 10px;">京东云</a> <a href="/tags/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD/" style="font-size: 12.5px;">人工智能</a> <a href="/tags/%E4%BB%A3%E7%90%86/" style="font-size: 10px;">代理</a> <a href="/tags/%E4%BB%A3%E7%A0%81/" style="font-size: 10px;">代码</a> <a href="/tags/%E4%BB%A3%E7%A0%81%E5%88%86%E4%BA%AB%E5%9B%BE/" style="font-size: 10px;">代码分享图</a> <a href="/tags/%E4%BC%98%E5%8C%96/" style="font-size: 10px;">优化</a> <a href="/tags/%E4%BD%8D%E8%BF%90%E7%AE%97/" style="font-size: 10px;">位运算</a> <a href="/tags/%E5%85%AC%E4%BC%97%E5%8F%B7/" style="font-size: 10px;">公众号</a> <a href="/tags/%E5%88%86%E4%BA%AB/" style="font-size: 10px;">分享</a> <a href="/tags/%E5%88%86%E5%B8%83%E5%BC%8F/" style="font-size: 10px;">分布式</a> <a href="/tags/%E5%88%9B%E4%B8%9A/" style="font-size: 10px;">创业</a> <a href="/tags/%E5%89%8D%E7%AB%AF/" style="font-size: 12.5px;">前端</a> <a href="/tags/%E5%8D%9A%E5%AE%A2/" style="font-size: 10px;">博客</a> <a href="/tags/%E5%8E%9F%E7%94%9FAPP/" style="font-size: 10px;">原生APP</a> <a href="/tags/%E5%8F%8D%E7%88%AC%E8%99%AB/" style="font-size: 12.5px;">反爬虫</a> <a href="/tags/%E5%91%BD%E4%BB%A4/" style="font-size: 10px;">命令</a> <a href="/tags/%E5%93%8D%E5%BA%94%E5%BC%8F%E5%B8%83%E5%B1%80/" style="font-size: 10px;">响应式布局</a> <a href="/tags/%E5%9E%83%E5%9C%BE%E9%82%AE%E4%BB%B6/" style="font-size: 10px;">垃圾邮件</a> <a href="/tags/%E5%9F%9F%E5%90%8D%E7%BB%91%E5%AE%9A/" style="font-size: 10px;">域名绑定</a> <a href="/tags/%E5%A4%8D%E7%9B%98/" style="font-size: 10px;">复盘</a> <a href="/tags/%E5%A4%A7%E4%BC%97%E7%82%B9%E8%AF%84/" style="font-size: 10px;">大众点评</a> <a href="/tags/%E5%AD%97%E4%BD%93%E5%8F%8D%E7%88%AC%E8%99%AB/" style="font-size: 10px;">字体反爬虫</a> <a href="/tags/%E5%AD%97%E7%AC%A6%E9%97%AE%E9%A2%98/" style="font-size: 10px;">字符问题</a> <a href="/tags/%E5%AD%A6%E4%B9%A0%E6%96%B9%E6%B3%95/" style="font-size: 10px;">学习方法</a> <a href="/tags/%E5%AE%89%E5%8D%93/" style="font-size: 10px;">安卓</a> <a href="/tags/%E5%AE%9E%E7%94%A8/" style="font-size: 10px;">实用</a> <a href="/tags/%E5%B0%81%E9%9D%A2/" style="font-size: 10px;">封面</a> <a href="/tags/%E5%B4%94%E5%BA%86%E6%89%8D/" style="font-size: 18.75px;">崔庆才</a> <a href="/tags/%E5%B7%A5%E5%85%B7/" style="font-size: 12.5px;">工具</a> <a href="/tags/%E5%BC%80%E5%8F%91%E5%B7%A5%E5%85%B7/" style="font-size: 10px;">开发工具</a> <a href="/tags/%E5%BE%AE%E8%BD%AF/" style="font-size: 10px;">微软</a> <a href="/tags/%E6%80%9D%E8%80%83/" style="font-size: 10px;">思考</a> <a href="/tags/%E6%89%8B%E6%9C%BA%E8%AE%BF%E9%97%AE/" style="font-size: 10px;">手机访问</a> <a href="/tags/%E6%95%99%E7%A8%8B/" style="font-size: 10px;">教程</a> <a href="/tags/%E6%95%99%E8%82%B2/" style="font-size: 10px;">教育</a> <a href="/tags/%E6%96%B0%E4%B9%A6/" style="font-size: 12.5px;">新书</a> <a href="/tags/%E6%96%B9%E6%B3%95%E8%AE%BA/" style="font-size: 10px;">方法论</a> <a href="/tags/%E6%97%85%E6%B8%B8/" style="font-size: 10px;">旅游</a> <a href="/tags/%E6%97%A5%E5%BF%97/" style="font-size: 10px;">日志</a> <a href="/tags/%E6%9A%97%E6%97%B6%E9%97%B4/" style="font-size: 10px;">暗时间</a> <a href="/tags/%E6%9D%9C%E5%85%B0%E7%89%B9/" style="font-size: 11.25px;">杜兰特</a> <a href="/tags/%E6%A1%8C%E9%9D%A2/" style="font-size: 10px;">桌面</a> <a href="/tags/%E6%AD%8C%E5%8D%95/" style="font-size: 10px;">歌单</a> <a href="/tags/%E6%B1%9F%E5%8D%97/" style="font-size: 10px;">江南</a> <a href="/tags/%E6%B8%B8%E6%88%8F/" style="font-size: 10px;">游戏</a> <a href="/tags/%E7%84%A6%E8%99%91/" style="font-size: 10px;">焦虑</a> <a href="/tags/%E7%88%AC%E8%99%AB/" style="font-size: 16.25px;">爬虫</a> <a href="/tags/%E7%88%AC%E8%99%AB%E4%B9%A6%E7%B1%8D/" style="font-size: 11.25px;">爬虫书籍</a> <a href="/tags/%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F/" style="font-size: 10px;">环境变量</a> <a href="/tags/%E7%94%9F%E6%B4%BB%E7%AC%94%E8%AE%B0/" style="font-size: 10px;">生活笔记</a> <a href="/tags/%E7%99%BB%E5%BD%95/" style="font-size: 10px;">登录</a> <a href="/tags/%E7%9F%A5%E4%B9%8E/" style="font-size: 10px;">知乎</a> <a href="/tags/%E7%9F%AD%E4%BF%A1/" style="font-size: 10px;">短信</a> <a href="/tags/%E7%9F%AD%E4%BF%A1%E9%AA%8C%E8%AF%81%E7%A0%81/" style="font-size: 10px;">短信验证码</a> <a href="/tags/%E7%AC%94%E8%AE%B0%E8%BD%AF%E4%BB%B6/" style="font-size: 10px;">笔记软件</a> <a href="/tags/%E7%AF%AE%E7%BD%91/" style="font-size: 10px;">篮网</a> <a href="/tags/%E7%BA%B8%E5%BC%A0/" style="font-size: 10px;">纸张</a> <a href="/tags/%E7%BB%84%E4%BB%B6/" style="font-size: 10px;">组件</a> <a href="/tags/%E7%BD%91%E7%AB%99/" style="font-size: 10px;">网站</a> <a href="/tags/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/" style="font-size: 11.25px;">网络爬虫</a> <a href="/tags/%E7%BE%8E%E5%AD%A6/" style="font-size: 10px;">美学</a> <a href="/tags/%E8%82%89%E5%A4%B9%E9%A6%8D/" style="font-size: 10px;">肉夹馍</a> <a href="/tags/%E8%85%BE%E8%AE%AF%E4%BA%91/" style="font-size: 10px;">腾讯云</a> <a href="/tags/%E8%87%AA%E5%BE%8B/" style="font-size: 10px;">自律</a> <a href="/tags/%E8%A5%BF%E5%B0%91%E7%88%B7/" style="font-size: 10px;">西少爷</a> <a href="/tags/%E8%A7%86%E9%A2%91/" style="font-size: 10px;">视频</a> <a href="/tags/%E8%B0%B7%E6%AD%8C%E9%AA%8C%E8%AF%81%E7%A0%81/" style="font-size: 10px;">谷歌验证码</a> <a href="/tags/%E8%BF%90%E8%90%A5/" style="font-size: 10px;">运营</a> <a href="/tags/%E8%BF%9C%E7%A8%8B/" style="font-size: 10px;">远程</a> <a href="/tags/%E9%80%86%E5%90%91/" style="font-size: 10px;">逆向</a> <a href="/tags/%E9%85%8D%E7%BD%AE/" style="font-size: 10px;">配置</a> <a href="/tags/%E9%87%8D%E8%A3%85/" style="font-size: 10px;">重装</a> <a href="/tags/%E9%98%BF%E6%9D%9C/" style="font-size: 10px;">阿杜</a> <a href="/tags/%E9%9D%99%E8%A7%85/" style="font-size: 17.5px;">静觅</a> <a href="/tags/%E9%A2%A0%E8%A6%86/" style="font-size: 10px;">颠覆</a> <a href="/tags/%E9%A3%9E%E4%BF%A1/" style="font-size: 10px;">飞信</a> <a href="/tags/%E9%B8%BF%E8%92%99/" style="font-size: 10px;">鸿蒙</a>
              </div>
              <script>
                const tagsColors = ['#00a67c', '#5cb85c', '#d9534f', '#567e95', '#b37333', '#f4843d', '#15a287']
                const tagsElements = document.querySelectorAll('.sidebar-panel-tags .content a')
                tagsElements.forEach((item) =>
                {
                  item.style.backgroundColor = tagsColors[Math.floor(Math.random() * tagsColors.length)]
                })

              </script>
            </div>
            <div class="sidebar-panel sidebar-panel-categories sidebar-panel-active">
              <h4 class="name"> 分类 </h4>
              <div class="content">
                <ul class="category-list">
                  <li class="category-list-item"><a class="category-list-link" href="/categories/C-C/">C/C++</a><span class="category-list-count">23</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/HTML/">HTML</a><span class="category-list-count">14</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/Java/">Java</a><span class="category-list-count">5</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/JavaScript/">JavaScript</a><span class="category-list-count">26</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/Linux/">Linux</a><span class="category-list-count">15</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/Markdown/">Markdown</a><span class="category-list-count">1</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/Net/">Net</a><span class="category-list-count">4</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/Other/">Other</a><span class="category-list-count">39</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/PHP/">PHP</a><span class="category-list-count">27</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/Paper/">Paper</a><span class="category-list-count">2</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/Python/">Python</a><span class="category-list-count">261</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/TypeScript/">TypeScript</a><span class="category-list-count">2</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/%E4%B8%AA%E4%BA%BA%E5%B1%95%E7%A4%BA/">个人展示</a><span class="category-list-count">1</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/%E4%B8%AA%E4%BA%BA%E6%97%A5%E8%AE%B0/">个人日记</a><span class="category-list-count">9</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/%E4%B8%AA%E4%BA%BA%E8%AE%B0%E5%BD%95/">个人记录</a><span class="category-list-count">4</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/%E4%B8%AA%E4%BA%BA%E9%9A%8F%E7%AC%94/">个人随笔</a><span class="category-list-count">15</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/%E5%AE%89%E8%A3%85%E9%85%8D%E7%BD%AE/">安装配置</a><span class="category-list-count">59</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/%E6%8A%80%E6%9C%AF%E6%9D%82%E8%B0%88/">技术杂谈</a><span class="category-list-count">88</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/%E6%9C%AA%E5%88%86%E7%B1%BB/">未分类</a><span class="category-list-count">1</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/%E7%94%9F%E6%B4%BB%E7%AC%94%E8%AE%B0/">生活笔记</a><span class="category-list-count">1</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/%E7%A6%8F%E5%88%A9%E4%B8%93%E5%8C%BA/">福利专区</a><span class="category-list-count">6</span></li>
                  <li class="category-list-item"><a class="category-list-link" href="/categories/%E8%81%8C%E4%BD%8D%E6%8E%A8%E8%8D%90/">职位推荐</a><span class="category-list-count">2</span></li>
                </ul>
              </div>
            </div>
            <div class="sidebar-panel sidebar-panel-friends sidebar-panel-active">
              <h4 class="name"> 友情链接 </h4>
              <ul class="friends">
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/j2dub.jpg">
                  </span>
                  <span class="link">
                    <a href="https://www.findhao.net/" target="_blank" rel="noopener">FindHao</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/ou6mm.jpg">
                  </span>
                  <span class="link">
                    <a href="https://diygod.me/" target="_blank" rel="noopener">DIYgod</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/6apxu.jpg">
                  </span>
                  <span class="link">
                    <a href="https://www.51dev.com/" target="_blank" rel="noopener">IT技术社区</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://www.jankl.com/img/titleshu.jpg">
                  </span>
                  <span class="link">
                    <a href="https://www.jankl.com/" target="_blank" rel="noopener">liberalist</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/bqlbs.png">
                  </span>
                  <span class="link">
                    <a href="http://www.urselect.com/" target="_blank" rel="noopener">优社电商</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/8s88c.jpg">
                  </span>
                  <span class="link">
                    <a href="https://www.yuanrenxue.com/" target="_blank" rel="noopener">猿人学</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/2wgg5.jpg">
                  </span>
                  <span class="link">
                    <a href="https://www.yunlifang.cn/" target="_blank" rel="noopener">云立方</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/shwr6.png">
                  </span>
                  <span class="link">
                    <a href="http://lanbing510.info/" target="_blank" rel="noopener">冰蓝</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/blvoh.jpg">
                  </span>
                  <span class="link">
                    <a href="https://lengyue.me/" target="_blank" rel="noopener">冷月</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="http://qianxunclub.com/favicon.png">
                  </span>
                  <span class="link">
                    <a href="http://qianxunclub.com/" target="_blank" rel="noopener">千寻啊千寻</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/0044u.jpg">
                  </span>
                  <span class="link">
                    <a href="http://kodcloud.com/" target="_blank" rel="noopener">可道云</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/ygnpn.jpg">
                  </span>
                  <span class="link">
                    <a href="http://www.kunkundashen.cn/" target="_blank" rel="noopener">坤坤大神</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/22uv1.png">
                  </span>
                  <span class="link">
                    <a href="http://www.cenchong.com/" target="_blank" rel="noopener">岑冲博客</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/ev9kl.png">
                  </span>
                  <span class="link">
                    <a href="http://www.zxiaoji.com/" target="_blank" rel="noopener">张小鸡</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://www.503error.com/favicon.ico">
                  </span>
                  <span class="link">
                    <a href="https://www.503error.com/" target="_blank" rel="noopener">张志明个人博客</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/x714o.jpg">
                  </span>
                  <span class="link">
                    <a href="http://www.hubwiz.com/" target="_blank" rel="noopener">汇智网</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/129d8.png">
                  </span>
                  <span class="link">
                    <a href="https://www.bysocket.com/" target="_blank" rel="noopener">泥瓦匠BYSocket</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://www.xiongge.club/favicon.ico">
                  </span>
                  <span class="link">
                    <a href="https://www.xiongge.club/" target="_blank" rel="noopener">熊哥club</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/3w4fe.png">
                  </span>
                  <span class="link">
                    <a href="https://zerlong.com/" target="_blank" rel="noopener">知语</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/44hxf.png">
                  </span>
                  <span class="link">
                    <a href="http://redstonewill.com/" target="_blank" rel="noopener">红色石头</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/8g1fk.jpg">
                  </span>
                  <span class="link">
                    <a href="http://www.laodong.me/" target="_blank" rel="noopener">老董博客</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/wkaus.jpg">
                  </span>
                  <span class="link">
                    <a href="https://zhaoshuai.me/" target="_blank" rel="noopener">碎念</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/pgo0r.jpg">
                  </span>
                  <span class="link">
                    <a href="https://www.chenwenguan.com/" target="_blank" rel="noopener">陈文管的博客</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/kk82a.jpg">
                  </span>
                  <span class="link">
                    <a href="https://www.lxlinux.net/" target="_blank" rel="noopener">良许Linux教程网</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/lj0t2.jpg">
                  </span>
                  <span class="link">
                    <a href="https://tanqingbo.cn/" target="_blank" rel="noopener">IT码农</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/i8cdr.png">
                  </span>
                  <span class="link">
                    <a href="https://junyiseo.com/" target="_blank" rel="noopener">均益个人博客</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/chwv2.png">
                  </span>
                  <span class="link">
                    <a href="https://brucedone.com/" target="_blank" rel="noopener">大鱼的鱼塘</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/2y43o.png">
                  </span>
                  <span class="link">
                    <a href="http://bbs.nightteam.cn/" target="_blank" rel="noopener">夜幕爬虫安全论坛</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/zvc3w.jpg">
                  </span>
                  <span class="link">
                    <a href="https://www.weishidong.com/" target="_blank" rel="noopener">韦世东的技术专栏</a>
                  </span>
                </li>
                <li class="friend">
                  <span class="logo">
                    <img src="https://qiniu.cuiqingcai.com/ebudy.jpg">
                  </span>
                  <span class="link">
                    <a href="https://chuanjiabing.com/" target="_blank" rel="noopener">穿甲兵技术社区</a>
                  </span>
                </li>
              </ul>
            </div>
          </div>
        </aside>
        <div id="sidebar-dimmer"></div>
      </div>
    </main>
    <footer class="footer">
      <div class="footer-inner">
        <div class="copyright"> &copy; <span itemprop="copyrightYear">2021</span>
          <span class="with-love">
            <i class="fa fa-heart"></i>
          </span>
          <span class="author" itemprop="copyrightHolder">崔庆才丨静觅</span>
          <span class="post-meta-divider">|</span>
          <span class="post-meta-item-icon">
            <i class="fa fa-chart-area"></i>
          </span>
          <span title="站点总字数">2.6m</span>
          <span class="post-meta-divider">|</span>
          <span class="post-meta-item-icon">
            <i class="fa fa-coffee"></i>
          </span>
          <span title="站点阅读时长">39:54</span>
        </div>
        <div class="powered-by">由 <a href="https://hexo.io/" class="theme-link" rel="noopener" target="_blank">Hexo</a> & <a href="https://pisces.theme-next.org/" class="theme-link" rel="noopener" target="_blank">NexT.Pisces</a> 强力驱动 </div>
        <div class="beian"><a href="https://beian.miit.gov.cn/" rel="noopener" target="_blank">京ICP备18015597号-1 </a>
        </div>
        <script>
          (function ()
          {
            function leancloudSelector(url)
            {
              url = encodeURI(url);
              return document.getElementById(url).querySelector('.leancloud-visitors-count');
            }

            function addCount(Counter)
            {
              var visitors = document.querySelector('.leancloud_visitors');
              var url = decodeURI(visitors.id);
              var title = visitors.dataset.flagTitle;
              Counter('get', '/classes/Counter?where=' + encodeURIComponent(JSON.stringify(
              {
                url
              }))).then(response => response.json()).then((
              {
                results
              }) =>
              {
                if (results.length > 0)
                {
                  var counter = results[0];
                  leancloudSelector(url).innerText = counter.time + 1;
                  Counter('put', '/classes/Counter/' + counter.objectId,
                  {
                    time:
                    {
                      '__op': 'Increment',
                      'amount': 1
                    }
                  }).catch(error =>
                  {
                    console.error('Failed to save visitor count', error);
                  });
                }
                else
                {
                  Counter('post', '/classes/Counter',
                  {
                    title,
                    url,
                    time: 1
                  }).then(response => response.json()).then(() =>
                  {
                    leancloudSelector(url).innerText = 1;
                  }).catch(error =>
                  {
                    console.error('Failed to create', error);
                  });
                }
              }).catch(error =>
              {
                console.error('LeanCloud Counter Error', error);
              });
            }

            function showTime(Counter)
            {
              var visitors = document.querySelectorAll('.leancloud_visitors');
              var entries = [...visitors].map(element =>
              {
                return decodeURI(element.id);
              });
              Counter('get', '/classes/Counter?where=' + encodeURIComponent(JSON.stringify(
              {
                url:
                {
                  '$in': entries
                }
              }))).then(response => response.json()).then((
              {
                results
              }) =>
              {
                for (let url of entries)
                {
                  let target = results.find(item => item.url === url);
                  leancloudSelector(url).innerText = target ? target.time : 0;
                }
              }).catch(error =>
              {
                console.error('LeanCloud Counter Error', error);
              });
            }
            let
            {
              app_id,
              app_key,
              server_url
            } = {
              "enable": true,
              "app_id": "6X5dRQ0pnPWJgYy8SXOg0uID-gzGzoHsz",
              "app_key": "ziLDVEy73ne5HtFTiGstzHMS",
              "server_url": "https://6x5drq0p.lc-cn-n1-shared.com",
              "security": false
            };

            function fetchData(api_server)
            {
              var Counter = (method, url, data) =>
              {
                return fetch(`${api_server}/1.1${url}`,
                {
                  method,
                  headers:
                  {
                    'X-LC-Id': app_id,
                    'X-LC-Key': app_key,
                    'Content-Type': 'application/json',
                  },
                  body: JSON.stringify(data)
                });
              };
              if (CONFIG.page.isPost)
              {
                if (CONFIG.hostname !== location.hostname) return;
                addCount(Counter);
              }
              else if (document.querySelectorAll('.post-title-link').length >= 1)
              {
                showTime(Counter);
              }
            }
            let api_server = app_id.slice(-9) !== '-MdYXbMMI' ? server_url : `https://${app_id.slice(0, 8).toLowerCase()}.api.lncldglobal.com`;
            if (api_server)
            {
              fetchData(api_server);
            }
            else
            {
              fetch('https://app-router.leancloud.cn/2/route?appId=' + app_id).then(response => response.json()).then((
              {
                api_server
              }) =>
              {
                fetchData('https://' + api_server);
              });
            }
          })();

        </script>
      </div>
      <div class="footer-stat">
        <span id="cnzz_stat_icon_1279355174"></span>
        <script type="text/javascript">
          document.write(unescape("%3Cspan id='cnzz_stat_icon_1279355174'%3E%3C/span%3E%3Cscript src='https://v1.cnzz.com/z_stat.php%3Fid%3D1279355174%26online%3D1%26show%3Dline' type='text/javascript'%3E%3C/script%3E"));

        </script>
      </div>
    </footer>
  </div>
  <script src="//cdn.jsdelivr.net/npm/animejs@3.2.1/lib/anime.min.js"></script>
  <script src="//cdn.jsdelivr.net/npm/pangu@4/dist/browser/pangu.min.js"></script>
  <script src="/js/utils.js"></script>
  <script src="/.js"></script>
  <script src="/js/schemes/pisces.js"></script>
  <script src="/.js"></script>
  <script src="/js/next-boot.js"></script>
  <script src="/.js"></script>
  <script>
    (function ()
    {
      var canonicalURL, curProtocol;
      //Get the <link> tag
      var x = document.getElementsByTagName("link");
      //Find the last canonical URL
      if (x.length > 0)
      {
        for (i = 0; i < x.length; i++)
        {
          if (x[i].rel.toLowerCase() == 'canonical' && x[i].href)
          {
            canonicalURL = x[i].href;
          }
        }
      }
      //Get protocol
      if (!canonicalURL)
      {
        curProtocol = window.location.protocol.split(':')[0];
      }
      else
      {
        curProtocol = canonicalURL.split(':')[0];
      }
      //Get current URL if the canonical URL does not exist
      if (!canonicalURL) canonicalURL = window.location.href;
      //Assign script content. Replace current URL with the canonical URL
      ! function ()
      {
        var e = /([http|https]:\/\/[a-zA-Z0-9\_\.]+\.baidu\.com)/gi,
          r = canonicalURL,
          t = document.referrer;
        if (!e.test(r))
        {
          var n = (String(curProtocol).toLowerCase() === 'https') ? "https://sp0.baidu.com/9_Q4simg2RQJ8t7jm9iCKT-xh_/s.gif" : "//api.share.baidu.com/s.gif";
          t ? (n += "?r=" + encodeURIComponent(document.referrer), r && (n += "&l=" + r)) : r && (n += "?l=" + r);
          var i = new Image;
          i.src = n
        }
      }(window);
    })();

  </script>
  <script src="/js/local-search.js"></script>
  <script src="/.js"></script>
  <link rel="stylesheet" href="//cdn.jsdelivr.net/npm/gitalk@1/dist/gitalk.min.css">
  <script>
    NexT.utils.loadComments(document.querySelector('#gitalk-container'), () =>
    {
      NexT.utils.getScript('//cdn.jsdelivr.net/npm/gitalk@1/dist/gitalk.min.js', () =>
      {
        var gitalk = new Gitalk(
        {
          clientID: '4c86ce1d7c4fbb3b277c',
          clientSecret: '4927beb0f90e2c07e66c99d9d2529cf3eb8ac8e4',
          repo: 'Blog',
          owner: 'germey',
          admin: ['germey'],
          id: '64b1353b209ca5ee9b70a354239ce09d',
          language: 'zh-CN',
          distractionFreeMode: true
        });
        gitalk.render('gitalk-container');
      }, window.Gitalk);
    });

  </script>
</body>

</html>
